1, optimize the subtitle generation in edge mode

2, optimize the llm prompt, use the same language as the video subject
This commit is contained in:
harry
2024-03-24 17:50:50 +08:00
parent 296a1370d3
commit b471a272b6
4 changed files with 20 additions and 9 deletions

View File

@@ -89,7 +89,7 @@ class VideoParams:
""" """
video_subject: str video_subject: str
video_script: str = "" # 用于生成视频的脚本 video_script: str = "" # 用于生成视频的脚本
video_terms: str = "" # 用于生成视频的关键词 video_terms: Optional[str | list] = None # 用于生成视频的关键词
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
video_clip_duration: Optional[int] = 5 video_clip_duration: Optional[int] = 5

View File

@@ -77,13 +77,13 @@ Generate a script for a video, depending on the subject of the video.
5. only return the raw content of the script. 5. only return the raw content of the script.
6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line. 6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line.
7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script. 7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
8. respond in the same language as the video subject.
## Output Example: ## Output Example:
What is the meaning of life. This question has puzzled philosophers. What is the meaning of life. This question has puzzled philosophers.
# Initialization: # Initialization:
- video subject: {video_subject} - video subject: {video_subject}
- output language: {language}
- number of paragraphs: {paragraph_number} - number of paragraphs: {paragraph_number}
""".strip() """.strip()

View File

@@ -58,7 +58,13 @@ def start(task_id, params: VideoParams):
if not video_terms: if not video_terms:
video_terms = llm.generate_terms(video_subject=video_subject, video_script=video_script, amount=5) video_terms = llm.generate_terms(video_subject=video_subject, video_script=video_script, amount=5)
else: else:
if isinstance(video_terms, str):
video_terms = [term.strip() for term in re.split(r'[,]', video_terms)] video_terms = [term.strip() for term in re.split(r'[,]', video_terms)]
elif isinstance(video_terms, list):
video_terms = [term.strip() for term in video_terms]
else:
raise ValueError("video_terms must be a string or a list of strings.")
logger.debug(f"video terms: {utils.to_json(video_terms)}") logger.debug(f"video terms: {utils.to_json(video_terms)}")
script_file = path.join(utils.task_dir(task_id), f"script.json") script_file = path.join(utils.task_dir(task_id), f"script.json")
@@ -95,7 +101,7 @@ def start(task_id, params: VideoParams):
else: else:
subtitle_lines = subtitle.file_to_subtitles(subtitle_path) subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
if not subtitle_lines: if not subtitle_lines:
logger.warning(f"subtitle file is invalid: {subtitle_path}") logger.warning(f"subtitle file is invalid, fallback to whisper : {subtitle_path}")
subtitle_fallback = True subtitle_fallback = True
if subtitle_provider == "whisper" or subtitle_fallback: if subtitle_provider == "whisper" or subtitle_fallback:

View File

@@ -57,6 +57,8 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
sub_index = 0 sub_index = 0
script_lines = utils.split_string_by_punctuations(text) script_lines = utils.split_string_by_punctuations(text)
# remove space in every word
script_lines_without_space = [line.replace(" ", "") for line in script_lines]
sub_line = "" sub_line = ""
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)): for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
@@ -66,14 +68,17 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
sub = unescape(sub) sub = unescape(sub)
sub_line += sub sub_line += sub
if sub_line == script_lines[sub_index]: if sub_line == script_lines[sub_index] or sub_line == script_lines_without_space[sub_index]:
sub_text = script_lines[sub_index]
sub_index += 1 sub_index += 1
sub_items.append(formatter( line = formatter(
idx=sub_index, idx=sub_index,
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
sub_text=sub_line, sub_text=sub_text,
)) )
# logger.debug(line.strip())
sub_items.append(line)
start_time = -1.0 start_time = -1.0
sub_line = "" sub_line = ""