1, 增加一次性输出多个视频

2, 增加背景音乐音量设置
3, 增加字幕位置
4, UI优化
5, 一些其他Bug修复和优化
This commit is contained in:
harry
2024-03-23 15:31:34 +08:00
parent ce4b3771b6
commit 0771b3268c
7 changed files with 146 additions and 65 deletions

View File

@@ -28,9 +28,9 @@ imagemagick_path = app.get("imagemagick_path", "")
if imagemagick_path and os.path.isfile(imagemagick_path): if imagemagick_path and os.path.isfile(imagemagick_path):
os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path
__cfg = { # __cfg = {
"hostname": hostname, # "hostname": hostname,
"listen_host": listen_host, # "listen_host": listen_host,
"listen_port": listen_port, # "listen_port": listen_port,
} # }
logger.info(__cfg) # logger.info(__cfg)

View File

@@ -93,12 +93,14 @@ class VideoParams:
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
video_clip_duration: Optional[int] = 5 video_clip_duration: Optional[int] = 5
video_count: Optional[int] = 1
voice_name: Optional[str] = VoiceNames[0] voice_name: Optional[str] = VoiceNames[0]
bgm_type: Optional[str] = "random" bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = "" bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
subtitle_enabled: Optional[bool] = True subtitle_enabled: Optional[bool] = True
subtitle_position: Optional[str] = "bottom" # top, bottom, center
font_name: Optional[str] = "STHeitiMedium.ttc" font_name: Optional[str] = "STHeitiMedium.ttc"
text_fore_color: Optional[str] = "#FFFFFF" text_fore_color: Optional[str] = "#FFFFFF"
text_background_color: Optional[str] = "transparent" text_background_color: Optional[str] = "transparent"

View File

@@ -105,7 +105,7 @@ def create(audio_file, subtitle_file: str = ""):
lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time"))) lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time")))
idx += 1 idx += 1
sub = "\n".join(lines) sub = "\n".join(lines) + "\n"
with open(subtitle_file, "w", encoding="utf-8") as f: with open(subtitle_file, "w", encoding="utf-8") as f:
f.write(sub) f.write(sub)
logger.info(f"subtitle file created: {subtitle_file}") logger.info(f"subtitle file created: {subtitle_file}")

View File

@@ -1,3 +1,4 @@
import math
import os.path import os.path
import re import re
from os import path from os import path
@@ -5,7 +6,7 @@ from os import path
from loguru import logger from loguru import logger
from app.config import config from app.config import config
from app.models.schema import VideoParams, VoiceNames from app.models.schema import VideoParams, VoiceNames, VideoConcatMode
from app.services import llm, material, voice, video, subtitle from app.services import llm, material, voice, video, subtitle
from app.utils import utils from app.utils import utils
@@ -78,6 +79,8 @@ def start(task_id, params: VideoParams):
return return
audio_duration = voice.get_audio_duration(sub_maker) audio_duration = voice.get_audio_duration(sub_maker)
audio_duration = math.ceil(audio_duration)
subtitle_path = "" subtitle_path = ""
if params.subtitle_enabled: if params.subtitle_enabled:
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
@@ -110,7 +113,7 @@ def start(task_id, params: VideoParams):
search_terms=video_terms, search_terms=video_terms,
video_aspect=params.video_aspect, video_aspect=params.video_aspect,
video_contact_mode=params.video_concat_mode, video_contact_mode=params.video_concat_mode,
audio_duration=audio_duration, audio_duration=audio_duration * params.video_count,
max_clip_duration=max_clip_duration, max_clip_duration=max_clip_duration,
) )
if not downloaded_videos: if not downloaded_videos:
@@ -118,19 +121,26 @@ def start(task_id, params: VideoParams):
"failed to download videos, maybe the network is not available. if you are in China, please use a VPN.") "failed to download videos, maybe the network is not available. if you are in China, please use a VPN.")
return return
logger.info("\n\n## combining videos") final_video_paths = []
combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4") video_concat_mode = params.video_concat_mode
if params.video_count > 1:
video_concat_mode = VideoConcatMode.random
for i in range(params.video_count):
index = i + 1
combined_video_path = path.join(utils.task_dir(task_id), f"combined-{index}.mp4")
logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
video.combine_videos(combined_video_path=combined_video_path, video.combine_videos(combined_video_path=combined_video_path,
video_paths=downloaded_videos, video_paths=downloaded_videos,
audio_file=audio_file, audio_file=audio_file,
video_aspect=params.video_aspect, video_aspect=params.video_aspect,
video_concat_mode=params.video_concat_mode, video_concat_mode=video_concat_mode,
max_clip_duration=max_clip_duration, max_clip_duration=max_clip_duration,
threads=n_threads) threads=n_threads)
final_video_path = path.join(utils.task_dir(task_id), f"final.mp4") final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
logger.info("\n\n## generating video") logger.info(f"\n\n## generating video: {index} => {final_video_path}")
# Put everything together # Put everything together
video.generate_video(video_path=combined_video_path, video.generate_video(video_path=combined_video_path,
audio_path=audio_file, audio_path=audio_file,
@@ -138,7 +148,10 @@ def start(task_id, params: VideoParams):
output_file=final_video_path, output_file=final_video_path,
params=params, params=params,
) )
logger.start(f"task {task_id} finished") final_video_paths.append(final_video_path)
logger.success(f"task {task_id} finished, generated {len(final_video_paths)} videos.")
return { return {
"video_file": final_video_path, "videos": final_video_paths,
} }

View File

@@ -34,31 +34,30 @@ def combine_videos(combined_video_path: str,
max_clip_duration: int = 5, max_clip_duration: int = 5,
threads: int = 2, threads: int = 2,
) -> str: ) -> str:
logger.info(f"combining {len(video_paths)} videos into one file: {combined_video_path}")
audio_clip = AudioFileClip(audio_file) audio_clip = AudioFileClip(audio_file)
max_duration = audio_clip.duration audio_duration = audio_clip.duration
logger.info(f"max duration of audio: {max_duration} seconds") logger.info(f"max duration of audio: {audio_duration} seconds")
# Required duration of each clip # Required duration of each clip
req_dur = max_duration / len(video_paths) req_dur = audio_duration / len(video_paths)
req_dur = max_clip_duration
logger.info(f"each clip will be maximum {req_dur} seconds long") logger.info(f"each clip will be maximum {req_dur} seconds long")
aspect = VideoAspect(video_aspect) aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution() video_width, video_height = aspect.to_resolution()
clips = [] clips = []
tot_dur = 0 video_duration = 0
# Add downloaded clips over and over until the duration of the audio (max_duration) has been reached # Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
while tot_dur < max_duration: while video_duration < audio_duration:
# random video_paths order # random video_paths order
if video_concat_mode.value == VideoConcatMode.random.value: if video_concat_mode.value == VideoConcatMode.random.value:
random.shuffle(video_paths) random.shuffle(video_paths)
for video_path in video_paths: for video_path in video_paths:
clip = VideoFileClip(video_path) clip = VideoFileClip(video_path).without_audio()
clip = clip.without_audio()
# Check if clip is longer than the remaining audio # Check if clip is longer than the remaining audio
if (max_duration - tot_dur) < clip.duration: if (audio_duration - video_duration) < clip.duration:
clip = clip.subclip(0, (max_duration - tot_dur)) clip = clip.subclip(0, (audio_duration - video_duration))
# Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image # Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
elif req_dur < clip.duration: elif req_dur < clip.duration:
clip = clip.subclip(0, req_dur) clip = clip.subclip(0, req_dur)
@@ -88,7 +87,7 @@ def combine_videos(combined_video_path: str,
clip = clip.subclip(0, max_clip_duration) clip = clip.subclip(0, max_clip_duration)
clips.append(clip) clips.append(clip)
tot_dur += clip.duration video_duration += clip.duration
final_clip = concatenate_videoclips(clips) final_clip = concatenate_videoclips(clips)
final_clip = final_clip.set_fps(30) final_clip = final_clip.set_fps(30)
@@ -125,7 +124,7 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
_wrapped_lines_.append(_txt_) _wrapped_lines_.append(_txt_)
_txt_ = '' _txt_ = ''
_wrapped_lines_.append(_txt_) _wrapped_lines_.append(_txt_)
return '\n'.join(_wrapped_lines_) return '\n'.join(_wrapped_lines_).strip()
def generate_video(video_path: str, def generate_video(video_path: str,
@@ -153,11 +152,23 @@ def generate_video(video_path: str,
logger.info(f"using font: {font_path}") logger.info(f"using font: {font_path}")
def generator(txt): if params.subtitle_position == "top":
wrapped_txt = wrap_text(txt, max_width=video_width - 100, position_height = video_height * 0.1
elif params.subtitle_position == "bottom":
position_height = video_height * 0.9
else:
position_height = "center"
def generator(txt, **kwargs):
max_width = video_width * 0.9
# logger.debug(f"rendering text: {txt}")
wrapped_txt = wrap_text(txt,
max_width=max_width,
font=font_path, font=font_path,
fontsize=params.font_size) # 调整max_width以适应你的视频 fontsize=params.font_size
return TextClip( ) # 调整max_width以适应你的视频
clip = TextClip(
wrapped_txt, wrapped_txt,
font=font_path, font=font_path,
fontsize=params.font_size, fontsize=params.font_size,
@@ -167,18 +178,16 @@ def generate_video(video_path: str,
stroke_width=params.stroke_width, stroke_width=params.stroke_width,
print_cmd=False, print_cmd=False,
) )
return clip
position_height = video_height - 200
if params.video_aspect == VideoAspect.landscape:
position_height = video_height - 100
clips = [ clips = [
VideoFileClip(video_path), VideoFileClip(video_path),
] ]
if subtitle_path and os.path.exists(subtitle_path): if subtitle_path and os.path.exists(subtitle_path):
subtitles = SubtitlesClip(subtitles=subtitle_path, make_textclip=generator, encoding='utf-8') sub = SubtitlesClip(subtitles=subtitle_path, make_textclip=generator, encoding='utf-8')
clips.append(subtitles.set_position(lambda _t: ('center', position_height))) sub_clip = sub.set_position(lambda _t: ('center', position_height))
clips.append(sub_clip)
result = CompositeVideoClip(clips) result = CompositeVideoClip(clips)
@@ -199,7 +208,7 @@ def generate_video(video_path: str,
original_audio = video_clip.audio original_audio = video_clip.audio
song_clip = AudioFileClip(bgm_file).set_fps(44100) song_clip = AudioFileClip(bgm_file).set_fps(44100)
# Set the volume of the song to 10% of the original volume # Set the volume of the song to 10% of the original volume
song_clip = song_clip.volumex(0.2).set_fps(44100) song_clip = song_clip.volumex(params.bgm_volume)
# Add the song to the video # Add the song to the video
comp_audio = CompositeAudioClip([original_audio, song_clip]) comp_audio = CompositeAudioClip([original_audio, song_clip])
video_clip = video_clip.set_audio(comp_audio) video_clip = video_clip.set_audio(comp_audio)
@@ -211,3 +220,38 @@ def generate_video(video_path: str,
os.remove(temp_output_file) os.remove(temp_output_file)
logger.success(f"completed") logger.success(f"completed")
if __name__ == "__main__":
txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟"
font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
print(t)
task_id = "69232dfa-f6c5-4b5e-80ba-be3098d3f930"
task_dir = utils.task_dir(task_id)
video_file = f"{task_dir}/combined-1.mp4"
audio_file = f"{task_dir}/audio.mp3"
subtitle_file = f"{task_dir}/subtitle.srt"
output_file = f"{task_dir}/final.mp4"
cfg = VideoParams()
cfg.video_aspect = VideoAspect.portrait
cfg.font_name = "STHeitiMedium.ttc"
cfg.font_size = 60
cfg.stroke_color = "#000000"
cfg.stroke_width = 1.5
cfg.text_fore_color = "#FFFFFF"
cfg.text_background_color = "transparent"
cfg.bgm_file = ""
cfg.bgm_volume = 0.2
cfg.subtitle_enabled = True
cfg.subtitle_position = "bottom"
cfg.n_threads = 2
cfg.paragraph_number = 1
generate_video(video_path=video_file,
audio_path=audio_file,
subtitle_path=subtitle_file,
output_file=output_file,
params=cfg
)

View File

@@ -78,7 +78,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
sub_line = "" sub_line = ""
with open(subtitle_file, "w", encoding="utf-8") as file: with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items)) file.write("\n".join(sub_items) + "\n")
def get_audio_duration(sub_maker: submaker.SubMaker): def get_audio_duration(sub_maker: submaker.SubMaker):

View File

@@ -1,16 +1,23 @@
import asyncio import streamlit as st
st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide",
initial_sidebar_state="auto")
import sys import sys
import os import os
import time
from uuid import uuid4 from uuid import uuid4
import streamlit as st
from loguru import logger from loguru import logger
from app.models.schema import VideoParams, VideoAspect, VoiceNames, VideoConcatMode from app.models.schema import VideoParams, VideoAspect, VoiceNames, VideoConcatMode
from app.services import task as tm, llm from app.services import task as tm, llm
st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide", hide_streamlit_style = """
initial_sidebar_state="auto") <style>#root > div:nth-child(1) > div > div > div > div > section > div {padding-top: 0rem;}</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
st.title("MoneyPrinterTurbo") st.title("MoneyPrinterTurbo")
st.write(
"⚠️ 先在 **config.toml** 中设置 `pexels_api_keys` 和 `llm_provider` 参数,根据不同的 llm_provider配置对应的 **API KEY**"
)
root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
font_dir = os.path.join(root_dir, "resource", "fonts") font_dir = os.path.join(root_dir, "resource", "fonts")
@@ -99,7 +106,7 @@ with left_panel:
cfg.video_script = st.text_area( cfg.video_script = st.text_area(
"视频文案(:blue[①可不填使用AI生成 ②合理使用标点断句,有助于生成字幕]", "视频文案(:blue[①可不填使用AI生成 ②合理使用标点断句,有助于生成字幕]",
value=st.session_state['video_script'], value=st.session_state['video_script'],
height=190 height=280
) )
if st.button("点击使用AI根据**文案**生成【视频关键词】", key="auto_generate_terms"): if st.button("点击使用AI根据**文案**生成【视频关键词】", key="auto_generate_terms"):
if not cfg.video_script: if not cfg.video_script:
@@ -114,14 +121,14 @@ with left_panel:
cfg.video_terms = st.text_area( cfg.video_terms = st.text_area(
"视频关键词(:blue[①可不填使用AI生成 ②用**英文逗号**分隔,只支持英文]", "视频关键词(:blue[①可不填使用AI生成 ②用**英文逗号**分隔,只支持英文]",
value=st.session_state['video_terms'], value=st.session_state['video_terms'],
height=40) height=50)
with middle_panel: with middle_panel:
with st.container(border=True): with st.container(border=True):
st.write("**视频设置**") st.write("**视频设置**")
video_concat_modes = [ video_concat_modes = [
("顺序拼接", "sequential"), ("顺序拼接", "sequential"),
("随机拼接", "random"), ("随机拼接(推荐)", "random"),
] ]
selected_index = st.selectbox("视频拼接模式", selected_index = st.selectbox("视频拼接模式",
index=1, index=1,
@@ -141,8 +148,8 @@ with middle_panel:
) )
cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1]) cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
cfg.video_clip_duration = st.slider("视频片段最大时长(秒)", 2, 5, 3) cfg.video_clip_duration = st.selectbox("视频片段最大时长(秒)", options=[2, 3, 4, 5, 6], index=1)
cfg.video_count = st.selectbox("同时生成视频数量", options=[1, 2, 3, 4, 5], index=0)
with st.container(border=True): with st.container(border=True):
st.write("**音频设置**") st.write("**音频设置**")
# 创建一个映射字典,将原始值映射到友好名称 # 创建一个映射字典,将原始值映射到友好名称
@@ -179,6 +186,8 @@ with middle_panel:
if custom_bgm_file and os.path.exists(custom_bgm_file): if custom_bgm_file and os.path.exists(custom_bgm_file):
cfg.bgm_file = custom_bgm_file cfg.bgm_file = custom_bgm_file
# st.write(f":red[已选择自定义背景音乐]**{custom_bgm_file}**") # st.write(f":red[已选择自定义背景音乐]**{custom_bgm_file}**")
cfg.bgm_volume = st.selectbox("背景音乐音量0.2表示20%,背景声音不宜过高)",
options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)
with right_panel: with right_panel:
with st.container(border=True): with st.container(border=True):
@@ -186,6 +195,19 @@ with right_panel:
cfg.subtitle_enabled = st.checkbox("生成字幕(若取消勾选,下面的设置都将不生效)", value=True) cfg.subtitle_enabled = st.checkbox("生成字幕(若取消勾选,下面的设置都将不生效)", value=True)
font_names = get_all_fonts() font_names = get_all_fonts()
cfg.font_name = st.selectbox("字体", font_names) cfg.font_name = st.selectbox("字体", font_names)
subtitle_positions = [
("顶部top", "top"),
("居中center", "center"),
("底部bottom推荐", "bottom"),
]
selected_index = st.selectbox("字幕位置",
index=2,
options=range(len(subtitle_positions)), # 使用索引作为内部选项值
format_func=lambda x: subtitle_positions[x][0] # 显示给用户的是标签
)
cfg.subtitle_position = subtitle_positions[selected_index][1]
font_cols = st.columns([0.3, 0.7]) font_cols = st.columns([0.3, 0.7])
with font_cols[0]: with font_cols[0]:
cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF") cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF")