This commit is contained in:
harry
2024-03-11 16:37:49 +08:00
parent d4f7b53b84
commit 06df797234
71 changed files with 2725 additions and 1 deletions

0
app/services/__init__.py Normal file
View File

152
app/services/gpt.py Normal file
View File

@@ -0,0 +1,152 @@
import logging
import re
import json
import openai
from typing import List
from loguru import logger
from app.config import config
openai_api_key = config.app.get("openai_api_key")
if not openai_api_key:
raise ValueError("openai_api_key is not set, please set it in the config.toml file.")
openai_model_name = config.app.get("openai_model_name")
if not openai_model_name:
raise ValueError("openai_model_name is not set, please set it in the config.toml file.")
openai_base_url = config.app.get("openai_base_url")
openai.api_key = openai_api_key
openai_model_name = openai_model_name
if openai_base_url:
openai.base_url = openai_base_url
def _generate_response(prompt: str) -> str:
model_name = openai_model_name
response = openai.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
).choices[0].message.content
return response
def generate_script(video_subject: str, language: str = "zh-CN", paragraph_number: int = 1) -> str:
prompt = f"""
# Role: Video Script Generator
## Goals:
Generate a script for a video, depending on the subject of the video.
## Constrains:
1. the script is to be returned as a string with the specified number of paragraphs.
2. do not under any circumstance reference this prompt in your response.
3. get straight to the point, don't start with unnecessary things like, "welcome to this video".
4. you must not include any type of markdown or formatting in the script, never use a title.
5. only return the raw content of the script.
6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line.
7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
## Output Example:
What is the meaning of life. This question has puzzled philosophers.
# Initialization:
- video subject: {video_subject}
- output language: {language}
- number of paragraphs: {paragraph_number}
""".strip()
final_script = ""
logger.info(f"subject: {video_subject}")
logger.debug(f"prompt: \n{prompt}")
response = _generate_response(prompt=prompt)
# Return the generated script
if response:
# Clean the script
# Remove asterisks, hashes
response = response.replace("*", "")
response = response.replace("#", "")
# Remove markdown syntax
response = re.sub(r"\[.*\]", "", response)
response = re.sub(r"\(.*\)", "", response)
# Split the script into paragraphs
paragraphs = response.split("\n\n")
# Select the specified number of paragraphs
selected_paragraphs = paragraphs[:paragraph_number]
# Join the selected paragraphs into a single string
final_script = "\n\n".join(selected_paragraphs)
# Print to console the number of paragraphs used
# logger.info(f"number of paragraphs used: {len(selected_paragraphs)}")
else:
logging.error("gpt returned an empty response")
logger.success(f"completed: \n{final_script}")
return final_script
def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
prompt = f"""
# Role: Video Search Terms Generator
## Goals:
Generate {amount} search terms for stock videos, depending on the subject of a video.
## Constrains:
1. the search terms are to be returned as a json-array of strings.
2. each search term should consist of 1-3 words, always add the main subject of the video.
3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
4. the search terms must be related to the subject of the video.
5. reply with english search terms only.
## Output Example:
["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
## Context:
### Video Subject
{video_subject}
### Video Script
{video_script}
""".strip()
logger.info(f"subject: {video_subject}")
logger.debug(f"prompt: \n{prompt}")
response = _generate_response(prompt)
search_terms = []
try:
search_terms = json.loads(response)
if not isinstance(search_terms, list) or not all(isinstance(term, str) for term in search_terms):
raise ValueError("response is not a list of strings.")
except (json.JSONDecodeError, ValueError):
# logger.warning(f"gpt returned an unformatted response. attempting to clean...")
# Attempt to extract list-like string and convert to list
match = re.search(r'\["(?:[^"\\]|\\.)*"(?:,\s*"[^"\\]*")*\]', response)
if match:
try:
search_terms = json.loads(match.group())
except json.JSONDecodeError:
logger.error(f"could not parse response: {response}")
return []
logger.success(f"completed: \n{search_terms}")
return search_terms
if __name__ == "__main__":
video_subject = "生命的意义是什么"
script = generate_script(video_subject=video_subject, language="zh-CN", paragraph_number=1)
# print("######################")
# print(script)
search_terms = generate_terms(video_subject=video_subject, video_script=script, amount=5)
# print("######################")
# print(search_terms)

112
app/services/material.py Normal file
View File

@@ -0,0 +1,112 @@
import time
import requests
from typing import List
from loguru import logger
from app.config import config
from app.models.schema import VideoAspect
from app.utils import utils
requested_count = 0
pexels_api_keys = config.app.get("pexels_api_keys")
if not pexels_api_keys:
raise ValueError("pexels_api_keys is not set, please set it in the config.toml file.")
def round_robin_api_key():
global requested_count
requested_count += 1
return pexels_api_keys[requested_count % len(pexels_api_keys)]
def search_videos(search_term: str,
wanted_count: int,
minimum_duration: int,
video_aspect: VideoAspect = VideoAspect.portrait,
locale: str = "zh-CN"
) -> List[str]:
aspect = VideoAspect(video_aspect)
video_orientation = aspect.name
video_width, video_height = aspect.to_resolution()
headers = {
"Authorization": round_robin_api_key()
}
# Build URL
query_url = f"https://api.pexels.com/videos/search?query={search_term}&per_page=15&orientation={video_orientation}&locale={locale}"
logger.info(f"searching videos: {query_url}")
# Send the request
r = requests.get(query_url, headers=headers)
# Parse the response
response = r.json()
video_urls = []
try:
videos_count = min(len(response["videos"]), wanted_count)
# loop through each video in the result
for i in range(videos_count):
# check if video has desired minimum duration
if response["videos"][i]["duration"] < minimum_duration:
continue
video_files = response["videos"][i]["video_files"]
# loop through each url to determine the best quality
for video in video_files:
# Check if video has a valid download link
# if ".com/external" in video["link"]:
w = int(video["width"])
h = int(video["height"])
if w == video_width and h == video_height:
video_urls.append(video["link"])
break
except Exception as e:
logger.error(f"search videos failed: {e}")
return video_urls
def save_video(video_url: str, save_dir: str) -> str:
video_id = f"vid-{str(int(time.time() * 1000))}"
video_path = f"{save_dir}/{video_id}.mp4"
with open(video_path, "wb") as f:
f.write(requests.get(video_url).content)
return video_path
def download_videos(task_id: str,
search_terms: List[str],
video_aspect: VideoAspect = VideoAspect.portrait,
wanted_count: int = 15,
minimum_duration: int = 5
) -> List[str]:
valid_video_urls = []
for search_term in search_terms:
# logger.info(f"searching videos for '{search_term}'")
video_urls = search_videos(search_term=search_term,
wanted_count=wanted_count,
minimum_duration=minimum_duration,
video_aspect=video_aspect)
logger.info(f"found {len(video_urls)} videos for '{search_term}'")
i = 0
for url in video_urls:
if url not in valid_video_urls:
valid_video_urls.append(url)
i += 1
if i >= 3:
break
logger.info(f"downloading videos: {len(valid_video_urls)}")
video_paths = []
save_dir = utils.task_dir(task_id)
for video_url in valid_video_urls:
try:
saved_video_path = save_video(video_url, save_dir)
video_paths.append(saved_video_path)
except Exception as e:
logger.error(f"failed to download video: {video_url}, {e}")
logger.success(f"downloaded {len(video_paths)} videos")
return video_paths

167
app/services/subtitle.py Normal file
View File

@@ -0,0 +1,167 @@
import json
import re
from faster_whisper import WhisperModel
from timeit import default_timer as timer
from loguru import logger
from app.config import config
from app.models import const
from app.utils import utils
model_size = config.whisper.get("model_size", "large-v3")
device = config.whisper.get("device", "cpu")
compute_type = config.whisper.get("compute_type", "int8")
model = WhisperModel(model_size_or_path=model_size, device=device, compute_type=compute_type)
def create(audio_file, subtitle_file: str = ""):
logger.info(f"start, output file: {subtitle_file}")
if not subtitle_file:
subtitle_file = f"{audio_file}.srt"
segments, info = model.transcribe(
audio_file,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500),
)
logger.info(f"detected language: '{info.language}', probability: {info.language_probability:.2f}")
start = timer()
subtitles = []
def recognized(seg_text, seg_start, seg_end):
seg_text = seg_text.strip()
if not seg_text:
return
msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
logger.debug(msg)
subtitles.append({
"msg": seg_text,
"start_time": seg_start,
"end_time": seg_end
})
for segment in segments:
words_idx = 0
words_len = len(segment.words)
seg_start = 0
seg_end = 0
seg_text = ""
if segment.words:
is_segmented = False
for word in segment.words:
if not is_segmented:
seg_start = word.start
is_segmented = True
seg_end = word.end
# 如果包含标点,则断句
seg_text += word.word
if utils.str_contains_punctuation(word.word):
# remove last char
seg_text = seg_text[:-1]
if not seg_text:
continue
recognized(seg_text, seg_start, seg_end)
is_segmented = False
seg_text = ""
if words_idx == 0 and segment.start < word.start:
seg_start = word.start
if words_idx == (words_len - 1) and segment.end > word.end:
seg_end = word.end
words_idx += 1
if not seg_text:
continue
recognized(seg_text, seg_start, seg_end)
end = timer()
diff = end - start
logger.info(f"complete, elapsed: {diff:.2f} s")
idx = 1
lines = []
for subtitle in subtitles:
text = subtitle.get("msg")
if text:
lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time")))
idx += 1
sub = "\n".join(lines)
with open(subtitle_file, "w") as f:
f.write(sub)
logger.info(f"subtitle file created: {subtitle_file}")
def file_to_subtitles(filename):
times_texts = []
current_times = None
current_text = ""
index = 0
with open(filename, 'r') as f:
for line in f:
times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
if times:
current_times = line
elif line.strip() == '' and current_times:
index += 1
times_texts.append((index, current_times.strip(), current_text.strip()))
current_times, current_text = None, ""
elif current_times:
current_text += line
return times_texts
def correct(subtitle_file, video_script):
subtitle_items = file_to_subtitles(subtitle_file)
script_lines = utils.split_string_by_punctuations(video_script)
corrected = False
if len(subtitle_items) == len(script_lines):
for i in range(len(script_lines)):
script_line = script_lines[i].strip()
subtitle_line = subtitle_items[i][2]
if script_line != subtitle_line:
logger.warning(f"line {i + 1}, script: {script_line}, subtitle: {subtitle_line}")
subtitle_items[i] = (subtitle_items[i][0], subtitle_items[i][1], script_line)
corrected = True
if corrected:
with open(subtitle_file, "w") as fd:
for item in subtitle_items:
fd.write(f"{item[0]}\n{item[1]}\n{item[2]}\n\n")
logger.info(f"subtitle corrected")
else:
logger.success(f"subtitle is correct")
if __name__ == "__main__":
task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
task_dir = utils.task_dir(task_id)
subtitle_file = f"{task_dir}/subtitle.srt"
subtitles = file_to_subtitles(subtitle_file)
print(subtitles)
script_file = f"{task_dir}/script.json"
with open(script_file, "r") as f:
script_content = f.read()
s = json.loads(script_content)
script = s.get("script")
correct(subtitle_file, script)

113
app/services/task.py Normal file
View File

@@ -0,0 +1,113 @@
from os import path
from loguru import logger
from app.config import config
from app.models.schema import VideoParams, VoiceNames
from app.services import gpt, material, voice, video, subtitle
from app.utils import utils
def _parse_voice(name: str):
# "female-zh-CN-XiaoxiaoNeural",
# remove first part split by "-"
if name not in VoiceNames:
name = VoiceNames[0]
parts = name.split("-")
_lang = f"{parts[1]}-{parts[2]}"
_voice = f"{_lang}-{parts[3]}"
return _voice, _lang
def start(task_id, params: VideoParams):
"""
{
"video_subject": "",
"video_aspect": "横屏 16:9西瓜视频",
"voice_name": "女生-晓晓",
"enable_bgm": false,
"font_name": "STHeitiMedium 黑体-中",
"text_color": "#FFFFFF",
"font_size": 60,
"stroke_color": "#000000",
"stroke_width": 1.5
}
"""
logger.info(f"start task: {task_id}")
video_subject = params.video_subject
voice_name, language = _parse_voice(params.voice_name)
paragraph_number = params.paragraph_number
n_threads = params.n_threads
logger.info("\n\n## generating video script")
script = gpt.generate_script(video_subject=video_subject, language=language, paragraph_number=paragraph_number)
logger.info("\n\n## generating video terms")
search_terms = gpt.generate_terms(video_subject=video_subject, video_script=script, amount=5)
script_file = path.join(utils.task_dir(task_id), f"script.json")
script_data = {
"script": script,
"search_terms": search_terms
}
with open(script_file, "w") as f:
f.write(utils.to_json(script_data))
audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
logger.info("\n\n## generating audio")
sub_maker = voice.tts(text=script, voice_name=voice_name, voice_file=audio_file)
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
if subtitle_provider == "edge":
voice.create_subtitle(text=script, sub_maker=sub_maker, subtitle_file=subtitle_path)
if subtitle_provider == "whisper":
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
logger.info("\n\n## correcting subtitle")
subtitle.correct(subtitle_file=subtitle_path, video_script=script)
logger.info("\n\n## downloading videos")
video_paths = material.download_videos(task_id=task_id, search_terms=search_terms, video_aspect=params.video_aspect,
wanted_count=20,
minimum_duration=5)
logger.info("\n\n## combining videos")
combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
video.combine_videos(combined_video_path=combined_video_path,
video_paths=video_paths,
audio_file=audio_file,
video_aspect=params.video_aspect,
max_clip_duration=5,
threads=n_threads)
final_video_path = path.join(utils.task_dir(task_id), f"final.mp4")
bgm_file = video.get_bgm_file(bgm_name=params.bgm_name)
logger.info("\n\n## generating video")
# Put everything together
video.generate_video(video_path=combined_video_path,
audio_path=audio_file,
subtitle_path=subtitle_path,
output_file=final_video_path,
video_aspect=params.video_aspect,
threads=n_threads,
font_name=params.font_name,
fontsize=params.font_size,
text_fore_color=params.text_fore_color,
stroke_color=params.stroke_color,
stroke_width=params.stroke_width,
bgm_file=bgm_file
)
logger.start(f"task {task_id} finished")
return {
"video_file": final_video_path,
}

246
app/services/video.py Normal file
View File

@@ -0,0 +1,246 @@
import glob
import random
from typing import List
from PIL import ImageFont
from loguru import logger
from moviepy.editor import *
from moviepy.video.fx.crop import crop
from moviepy.video.tools.subtitles import SubtitlesClip
from app.models.schema import VideoAspect
from app.utils import utils
def get_bgm_file(bgm_name: str = "random"):
if not bgm_name:
return ""
if bgm_name == "random":
suffix = "*.mp3"
song_dir = utils.song_dir()
# 使用glob.glob获取指定扩展名的文件列表
files = glob.glob(os.path.join(song_dir, suffix))
# 使用random.choice从列表中随机选择一个文件
return random.choice(files)
file = os.path.join(utils.song_dir(), bgm_name)
if os.path.exists(file):
return file
return ""
def combine_videos(combined_video_path: str,
video_paths: List[str],
audio_file: str,
video_aspect: VideoAspect = VideoAspect.portrait,
max_clip_duration: int = 5,
threads: int = 2,
) -> str:
logger.info(f"combining {len(video_paths)} videos into one file: {combined_video_path}")
audio_clip = AudioFileClip(audio_file)
max_duration = audio_clip.duration
logger.info(f"max duration of audio: {max_duration} seconds")
# Required duration of each clip
req_dur = max_duration / len(video_paths)
logger.info(f"each clip will be maximum {req_dur} seconds long")
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
clips = []
tot_dur = 0
# Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
while tot_dur < max_duration:
for video_path in video_paths:
clip = VideoFileClip(video_path)
clip = clip.without_audio()
# Check if clip is longer than the remaining audio
if (max_duration - tot_dur) < clip.duration:
clip = clip.subclip(0, (max_duration - tot_dur))
# Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
elif req_dur < clip.duration:
clip = clip.subclip(0, req_dur)
clip = clip.set_fps(30)
# Not all videos are same size, so we need to resize them
# logger.info(f"{video_path}: size is {clip.w} x {clip.h}, expected {video_width} x {video_height}")
if clip.w != video_width or clip.h != video_height:
if round((clip.w / clip.h), 4) < 0.5625:
clip = crop(clip,
width=clip.w,
height=round(clip.w / 0.5625),
x_center=clip.w / 2,
y_center=clip.h / 2
)
else:
clip = crop(clip,
width=round(0.5625 * clip.h),
height=clip.h,
x_center=clip.w / 2,
y_center=clip.h / 2
)
logger.info(f"resizing video to {video_width} x {video_height}")
clip = clip.resize((video_width, video_height))
if clip.duration > max_clip_duration:
clip = clip.subclip(0, max_clip_duration)
clips.append(clip)
tot_dur += clip.duration
final_clip = concatenate_videoclips(clips)
final_clip = final_clip.set_fps(30)
logger.info(f"writing")
final_clip.write_videofile(combined_video_path, threads=threads)
logger.success(f"completed")
return combined_video_path
def wrap_text(text, max_width, font='Arial', fontsize=60):
# 创建字体对象
font = ImageFont.truetype(font, fontsize)
def get_text_size(inner_text):
left, top, right, bottom = font.getbbox(inner_text)
return right - left, bottom - top
width, height = get_text_size(text)
if width <= max_width:
return text
logger.warning(f"wrapping text, max_width: {max_width}, text_width: {width}, text: {text}")
_wrapped_lines_ = []
# 使用textwrap尝试分行然后检查每行是否符合宽度限制
chars = list(text)
_txt_ = ''
for char in chars:
_txt_ += char
_width, _height = get_text_size(_txt_)
if _width <= max_width:
continue
else:
_wrapped_lines_.append(_txt_)
_txt_ = ''
_wrapped_lines_.append(_txt_)
return '\n'.join(_wrapped_lines_)
def generate_video(video_path: str,
audio_path: str,
subtitle_path: str,
output_file: str,
video_aspect: VideoAspect = VideoAspect.portrait,
threads: int = 2,
font_name: str = "",
fontsize: int = 60,
stroke_color: str = "#000000",
stroke_width: float = 1.5,
text_fore_color: str = "white",
text_background_color: str = "transparent",
bgm_file: str = "",
):
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
logger.info(f"start, video size: {video_width} x {video_height}")
logger.info(f" ① video: {video_path}")
logger.info(f" ② audio: {audio_path}")
logger.info(f" ③ subtitle: {subtitle_path}")
logger.info(f" ④ output: {output_file}")
if not font_name:
font_name = "STHeitiMedium.ttc"
font_path = os.path.join(utils.font_dir(), font_name)
logger.info(f"using font: {font_path}")
# 自定义的生成器函数,包含换行逻辑
def generator(txt):
# 应用自动换行
wrapped_txt = wrap_text(txt, max_width=video_width - 100,
font=font_path,
fontsize=fontsize) # 调整max_width以适应你的视频
return TextClip(
wrapped_txt,
font=font_path,
fontsize=fontsize,
color=text_fore_color,
bg_color=text_background_color,
stroke_color=stroke_color,
stroke_width=stroke_width,
print_cmd=False,
)
position_height = video_height - 200
if video_aspect == VideoAspect.landscape:
position_height = video_height - 100
clips = [
VideoFileClip(video_path),
# subtitles.set_position(lambda _t: ('center', position_height))
]
# Burn the subtitles into the video
if subtitle_path and os.path.exists(subtitle_path):
subtitles = SubtitlesClip(subtitle_path, generator)
clips.append(subtitles.set_position(lambda _t: ('center', position_height)))
result = CompositeVideoClip(clips)
# Add the audio
audio = AudioFileClip(audio_path)
result = result.set_audio(audio)
temp_output_file = f"{output_file}.temp.mp4"
logger.info(f"writing to temp file: {temp_output_file}")
result.write_videofile(temp_output_file, threads=threads or 2)
video_clip = VideoFileClip(temp_output_file)
if bgm_file:
logger.info(f"adding background music: {bgm_file}")
# Add song to video at 30% volume using moviepy
original_duration = video_clip.duration
original_audio = video_clip.audio
song_clip = AudioFileClip(bgm_file).set_fps(44100)
# Set the volume of the song to 10% of the original volume
song_clip = song_clip.volumex(0.2).set_fps(44100)
# Add the song to the video
comp_audio = CompositeAudioClip([original_audio, song_clip])
video_clip = video_clip.set_audio(comp_audio)
video_clip = video_clip.set_fps(30)
video_clip = video_clip.set_duration(original_duration)
# 编码为aac否则iPhone里面无法播放
logger.info(f"encoding audio codec to aac")
video_clip.write_videofile(output_file, audio_codec="aac", threads=threads)
# delete the temp file
os.remove(temp_output_file)
logger.success(f"completed")
if __name__ == "__main__":
txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟"
font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
print(t)
task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
task_dir = utils.task_dir(task_id)
video_file = f"{task_dir}/combined.mp4"
audio_file = f"{task_dir}/audio.mp3"
subtitle_file = f"{task_dir}/subtitle.srt"
output_file = f"{task_dir}/final.mp4"
generate_video(video_path=video_file,
audio_path=audio_file,
subtitle_path=subtitle_file,
output_file=output_file,
video_aspect=VideoAspect.portrait,
threads=2,
font_name="STHeitiMedium.ttc",
fontsize=60,
stroke_color="#000000",
stroke_width=1.5,
text_fore_color="white",
text_background_color="transparent",
bgm_file=""
)

101
app/services/voice.py Normal file
View File

@@ -0,0 +1,101 @@
import asyncio
from xml.sax.saxutils import unescape
from edge_tts.submaker import mktimestamp
from loguru import logger
from edge_tts import submaker, SubMaker
import edge_tts
from app.utils import utils
def tts(text: str, voice_name: str, voice_file: str) -> SubMaker:
logger.info(f"start, voice name: {voice_name}")
async def _do() -> SubMaker:
communicate = edge_tts.Communicate(text, voice_name)
sub_maker = edge_tts.SubMaker()
with open(voice_file, "wb") as file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
return sub_maker
sub_maker = asyncio.run(_do())
logger.info(f"completed, output file: {voice_file}")
return sub_maker
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
"""
优化字幕文件
1. 将字幕文件按照标点符号分割成多行
2. 逐行匹配字幕文件中的文本
3. 生成新的字幕文件
"""
def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
"""
1
00:00:00,000 --> 00:00:02,360
跑步是一项简单易行的运动
"""
start_t = mktimestamp(start_time).replace(".", ",")
end_t = mktimestamp(end_time).replace(".", ",")
return (
f"{idx}\n"
f"{start_t} --> {end_t}\n"
f"{sub_text}\n"
)
start_time = -1.0
sub_items = []
sub_index = 0
script_lines = utils.split_string_by_punctuations(text)
sub_line = ""
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
_start_time, end_time = offset
if start_time < 0:
start_time = _start_time
sub = unescape(sub)
sub_line += sub
if sub_line == script_lines[sub_index]:
sub_index += 1
sub_items.append(formatter(
idx=sub_index,
start_time=start_time,
end_time=end_time,
sub_text=sub_line,
))
start_time = -1.0
sub_line = ""
with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items))
if __name__ == "__main__":
temp_dir = utils.storage_dir("temp")
voice_names = [
# 女性
"zh-CN-XiaoxiaoNeural",
"zh-CN-XiaoyiNeural",
# 男性
"zh-CN-YunyangNeural",
"zh-CN-YunxiNeural",
]
text = """
预计未来3天深圳冷空气活动频繁未来两天持续阴天有小雨出门带好雨具
10-11日持续阴天有小雨日温差小气温在13-17℃之间体感阴凉
12日天气短暂好转早晚清凉
"""
for voice_name in voice_names:
voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
subtitle_file = f"{temp_dir}/tts.mp3.srt"
sub_maker = tts(text=text, voice_name=voice_name, voice_file=voice_file)
create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)