init
This commit is contained in:
0
app/services/__init__.py
Normal file
0
app/services/__init__.py
Normal file
152
app/services/gpt.py
Normal file
152
app/services/gpt.py
Normal file
@@ -0,0 +1,152 @@
|
||||
import logging
|
||||
import re
|
||||
import json
|
||||
import openai
|
||||
from typing import List
|
||||
from loguru import logger
|
||||
|
||||
from app.config import config
|
||||
|
||||
openai_api_key = config.app.get("openai_api_key")
|
||||
if not openai_api_key:
|
||||
raise ValueError("openai_api_key is not set, please set it in the config.toml file.")
|
||||
|
||||
openai_model_name = config.app.get("openai_model_name")
|
||||
if not openai_model_name:
|
||||
raise ValueError("openai_model_name is not set, please set it in the config.toml file.")
|
||||
|
||||
openai_base_url = config.app.get("openai_base_url")
|
||||
|
||||
openai.api_key = openai_api_key
|
||||
openai_model_name = openai_model_name
|
||||
if openai_base_url:
|
||||
openai.base_url = openai_base_url
|
||||
|
||||
|
||||
def _generate_response(prompt: str) -> str:
|
||||
model_name = openai_model_name
|
||||
|
||||
response = openai.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
).choices[0].message.content
|
||||
return response
|
||||
|
||||
|
||||
def generate_script(video_subject: str, language: str = "zh-CN", paragraph_number: int = 1) -> str:
|
||||
prompt = f"""
|
||||
# Role: Video Script Generator
|
||||
|
||||
## Goals:
|
||||
Generate a script for a video, depending on the subject of the video.
|
||||
|
||||
## Constrains:
|
||||
1. the script is to be returned as a string with the specified number of paragraphs.
|
||||
2. do not under any circumstance reference this prompt in your response.
|
||||
3. get straight to the point, don't start with unnecessary things like, "welcome to this video".
|
||||
4. you must not include any type of markdown or formatting in the script, never use a title.
|
||||
5. only return the raw content of the script.
|
||||
6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line.
|
||||
7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
|
||||
|
||||
## Output Example:
|
||||
What is the meaning of life. This question has puzzled philosophers.
|
||||
|
||||
# Initialization:
|
||||
- video subject: {video_subject}
|
||||
- output language: {language}
|
||||
- number of paragraphs: {paragraph_number}
|
||||
""".strip()
|
||||
|
||||
final_script = ""
|
||||
logger.info(f"subject: {video_subject}")
|
||||
logger.debug(f"prompt: \n{prompt}")
|
||||
response = _generate_response(prompt=prompt)
|
||||
|
||||
# Return the generated script
|
||||
if response:
|
||||
# Clean the script
|
||||
# Remove asterisks, hashes
|
||||
response = response.replace("*", "")
|
||||
response = response.replace("#", "")
|
||||
|
||||
# Remove markdown syntax
|
||||
response = re.sub(r"\[.*\]", "", response)
|
||||
response = re.sub(r"\(.*\)", "", response)
|
||||
|
||||
# Split the script into paragraphs
|
||||
paragraphs = response.split("\n\n")
|
||||
|
||||
# Select the specified number of paragraphs
|
||||
selected_paragraphs = paragraphs[:paragraph_number]
|
||||
|
||||
# Join the selected paragraphs into a single string
|
||||
final_script = "\n\n".join(selected_paragraphs)
|
||||
|
||||
# Print to console the number of paragraphs used
|
||||
# logger.info(f"number of paragraphs used: {len(selected_paragraphs)}")
|
||||
else:
|
||||
logging.error("gpt returned an empty response")
|
||||
|
||||
logger.success(f"completed: \n{final_script}")
|
||||
return final_script
|
||||
|
||||
|
||||
def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
|
||||
prompt = f"""
|
||||
# Role: Video Search Terms Generator
|
||||
|
||||
## Goals:
|
||||
Generate {amount} search terms for stock videos, depending on the subject of a video.
|
||||
|
||||
## Constrains:
|
||||
1. the search terms are to be returned as a json-array of strings.
|
||||
2. each search term should consist of 1-3 words, always add the main subject of the video.
|
||||
3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
|
||||
4. the search terms must be related to the subject of the video.
|
||||
5. reply with english search terms only.
|
||||
|
||||
## Output Example:
|
||||
["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
|
||||
|
||||
## Context:
|
||||
### Video Subject
|
||||
{video_subject}
|
||||
|
||||
### Video Script
|
||||
{video_script}
|
||||
""".strip()
|
||||
|
||||
logger.info(f"subject: {video_subject}")
|
||||
logger.debug(f"prompt: \n{prompt}")
|
||||
response = _generate_response(prompt)
|
||||
search_terms = []
|
||||
|
||||
try:
|
||||
search_terms = json.loads(response)
|
||||
if not isinstance(search_terms, list) or not all(isinstance(term, str) for term in search_terms):
|
||||
raise ValueError("response is not a list of strings.")
|
||||
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
# logger.warning(f"gpt returned an unformatted response. attempting to clean...")
|
||||
# Attempt to extract list-like string and convert to list
|
||||
match = re.search(r'\["(?:[^"\\]|\\.)*"(?:,\s*"[^"\\]*")*\]', response)
|
||||
if match:
|
||||
try:
|
||||
search_terms = json.loads(match.group())
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"could not parse response: {response}")
|
||||
return []
|
||||
|
||||
logger.success(f"completed: \n{search_terms}")
|
||||
return search_terms
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
video_subject = "生命的意义是什么"
|
||||
script = generate_script(video_subject=video_subject, language="zh-CN", paragraph_number=1)
|
||||
# print("######################")
|
||||
# print(script)
|
||||
search_terms = generate_terms(video_subject=video_subject, video_script=script, amount=5)
|
||||
# print("######################")
|
||||
# print(search_terms)
|
||||
112
app/services/material.py
Normal file
112
app/services/material.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import time
|
||||
|
||||
import requests
|
||||
from typing import List
|
||||
from loguru import logger
|
||||
|
||||
from app.config import config
|
||||
from app.models.schema import VideoAspect
|
||||
from app.utils import utils
|
||||
|
||||
requested_count = 0
|
||||
pexels_api_keys = config.app.get("pexels_api_keys")
|
||||
if not pexels_api_keys:
|
||||
raise ValueError("pexels_api_keys is not set, please set it in the config.toml file.")
|
||||
|
||||
|
||||
def round_robin_api_key():
|
||||
global requested_count
|
||||
requested_count += 1
|
||||
return pexels_api_keys[requested_count % len(pexels_api_keys)]
|
||||
|
||||
|
||||
def search_videos(search_term: str,
|
||||
wanted_count: int,
|
||||
minimum_duration: int,
|
||||
video_aspect: VideoAspect = VideoAspect.portrait,
|
||||
locale: str = "zh-CN"
|
||||
) -> List[str]:
|
||||
aspect = VideoAspect(video_aspect)
|
||||
video_orientation = aspect.name
|
||||
video_width, video_height = aspect.to_resolution()
|
||||
|
||||
headers = {
|
||||
"Authorization": round_robin_api_key()
|
||||
}
|
||||
|
||||
# Build URL
|
||||
query_url = f"https://api.pexels.com/videos/search?query={search_term}&per_page=15&orientation={video_orientation}&locale={locale}"
|
||||
logger.info(f"searching videos: {query_url}")
|
||||
# Send the request
|
||||
r = requests.get(query_url, headers=headers)
|
||||
|
||||
# Parse the response
|
||||
response = r.json()
|
||||
video_urls = []
|
||||
|
||||
try:
|
||||
videos_count = min(len(response["videos"]), wanted_count)
|
||||
# loop through each video in the result
|
||||
for i in range(videos_count):
|
||||
# check if video has desired minimum duration
|
||||
if response["videos"][i]["duration"] < minimum_duration:
|
||||
continue
|
||||
video_files = response["videos"][i]["video_files"]
|
||||
# loop through each url to determine the best quality
|
||||
for video in video_files:
|
||||
# Check if video has a valid download link
|
||||
# if ".com/external" in video["link"]:
|
||||
w = int(video["width"])
|
||||
h = int(video["height"])
|
||||
if w == video_width and h == video_height:
|
||||
video_urls.append(video["link"])
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"search videos failed: {e}")
|
||||
|
||||
return video_urls
|
||||
|
||||
|
||||
def save_video(video_url: str, save_dir: str) -> str:
|
||||
video_id = f"vid-{str(int(time.time() * 1000))}"
|
||||
video_path = f"{save_dir}/{video_id}.mp4"
|
||||
with open(video_path, "wb") as f:
|
||||
f.write(requests.get(video_url).content)
|
||||
|
||||
return video_path
|
||||
|
||||
|
||||
def download_videos(task_id: str,
|
||||
search_terms: List[str],
|
||||
video_aspect: VideoAspect = VideoAspect.portrait,
|
||||
wanted_count: int = 15,
|
||||
minimum_duration: int = 5
|
||||
) -> List[str]:
|
||||
valid_video_urls = []
|
||||
for search_term in search_terms:
|
||||
# logger.info(f"searching videos for '{search_term}'")
|
||||
video_urls = search_videos(search_term=search_term,
|
||||
wanted_count=wanted_count,
|
||||
minimum_duration=minimum_duration,
|
||||
video_aspect=video_aspect)
|
||||
logger.info(f"found {len(video_urls)} videos for '{search_term}'")
|
||||
i = 0
|
||||
for url in video_urls:
|
||||
if url not in valid_video_urls:
|
||||
valid_video_urls.append(url)
|
||||
i += 1
|
||||
if i >= 3:
|
||||
break
|
||||
|
||||
logger.info(f"downloading videos: {len(valid_video_urls)}")
|
||||
video_paths = []
|
||||
save_dir = utils.task_dir(task_id)
|
||||
for video_url in valid_video_urls:
|
||||
try:
|
||||
saved_video_path = save_video(video_url, save_dir)
|
||||
video_paths.append(saved_video_path)
|
||||
except Exception as e:
|
||||
logger.error(f"failed to download video: {video_url}, {e}")
|
||||
logger.success(f"downloaded {len(video_paths)} videos")
|
||||
return video_paths
|
||||
167
app/services/subtitle.py
Normal file
167
app/services/subtitle.py
Normal file
@@ -0,0 +1,167 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
from timeit import default_timer as timer
|
||||
from loguru import logger
|
||||
|
||||
from app.config import config
|
||||
from app.models import const
|
||||
from app.utils import utils
|
||||
|
||||
model_size = config.whisper.get("model_size", "large-v3")
|
||||
device = config.whisper.get("device", "cpu")
|
||||
compute_type = config.whisper.get("compute_type", "int8")
|
||||
|
||||
model = WhisperModel(model_size_or_path=model_size, device=device, compute_type=compute_type)
|
||||
|
||||
|
||||
def create(audio_file, subtitle_file: str = ""):
|
||||
logger.info(f"start, output file: {subtitle_file}")
|
||||
if not subtitle_file:
|
||||
subtitle_file = f"{audio_file}.srt"
|
||||
|
||||
segments, info = model.transcribe(
|
||||
audio_file,
|
||||
beam_size=5,
|
||||
word_timestamps=True,
|
||||
vad_filter=True,
|
||||
vad_parameters=dict(min_silence_duration_ms=500),
|
||||
)
|
||||
|
||||
logger.info(f"detected language: '{info.language}', probability: {info.language_probability:.2f}")
|
||||
|
||||
start = timer()
|
||||
subtitles = []
|
||||
|
||||
def recognized(seg_text, seg_start, seg_end):
|
||||
seg_text = seg_text.strip()
|
||||
if not seg_text:
|
||||
return
|
||||
|
||||
msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
|
||||
logger.debug(msg)
|
||||
|
||||
subtitles.append({
|
||||
"msg": seg_text,
|
||||
"start_time": seg_start,
|
||||
"end_time": seg_end
|
||||
})
|
||||
|
||||
for segment in segments:
|
||||
words_idx = 0
|
||||
words_len = len(segment.words)
|
||||
|
||||
seg_start = 0
|
||||
seg_end = 0
|
||||
seg_text = ""
|
||||
|
||||
if segment.words:
|
||||
is_segmented = False
|
||||
for word in segment.words:
|
||||
if not is_segmented:
|
||||
seg_start = word.start
|
||||
is_segmented = True
|
||||
|
||||
seg_end = word.end
|
||||
# 如果包含标点,则断句
|
||||
seg_text += word.word
|
||||
|
||||
if utils.str_contains_punctuation(word.word):
|
||||
# remove last char
|
||||
seg_text = seg_text[:-1]
|
||||
if not seg_text:
|
||||
continue
|
||||
|
||||
recognized(seg_text, seg_start, seg_end)
|
||||
|
||||
is_segmented = False
|
||||
seg_text = ""
|
||||
|
||||
if words_idx == 0 and segment.start < word.start:
|
||||
seg_start = word.start
|
||||
if words_idx == (words_len - 1) and segment.end > word.end:
|
||||
seg_end = word.end
|
||||
words_idx += 1
|
||||
|
||||
if not seg_text:
|
||||
continue
|
||||
|
||||
recognized(seg_text, seg_start, seg_end)
|
||||
|
||||
end = timer()
|
||||
|
||||
diff = end - start
|
||||
logger.info(f"complete, elapsed: {diff:.2f} s")
|
||||
|
||||
idx = 1
|
||||
lines = []
|
||||
for subtitle in subtitles:
|
||||
text = subtitle.get("msg")
|
||||
if text:
|
||||
lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time")))
|
||||
idx += 1
|
||||
|
||||
sub = "\n".join(lines)
|
||||
with open(subtitle_file, "w") as f:
|
||||
f.write(sub)
|
||||
logger.info(f"subtitle file created: {subtitle_file}")
|
||||
|
||||
|
||||
def file_to_subtitles(filename):
|
||||
times_texts = []
|
||||
current_times = None
|
||||
current_text = ""
|
||||
index = 0
|
||||
with open(filename, 'r') as f:
|
||||
for line in f:
|
||||
times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
|
||||
if times:
|
||||
current_times = line
|
||||
elif line.strip() == '' and current_times:
|
||||
index += 1
|
||||
times_texts.append((index, current_times.strip(), current_text.strip()))
|
||||
current_times, current_text = None, ""
|
||||
elif current_times:
|
||||
current_text += line
|
||||
return times_texts
|
||||
|
||||
|
||||
def correct(subtitle_file, video_script):
|
||||
subtitle_items = file_to_subtitles(subtitle_file)
|
||||
script_lines = utils.split_string_by_punctuations(video_script)
|
||||
|
||||
corrected = False
|
||||
if len(subtitle_items) == len(script_lines):
|
||||
for i in range(len(script_lines)):
|
||||
script_line = script_lines[i].strip()
|
||||
subtitle_line = subtitle_items[i][2]
|
||||
if script_line != subtitle_line:
|
||||
logger.warning(f"line {i + 1}, script: {script_line}, subtitle: {subtitle_line}")
|
||||
subtitle_items[i] = (subtitle_items[i][0], subtitle_items[i][1], script_line)
|
||||
corrected = True
|
||||
|
||||
if corrected:
|
||||
with open(subtitle_file, "w") as fd:
|
||||
for item in subtitle_items:
|
||||
fd.write(f"{item[0]}\n{item[1]}\n{item[2]}\n\n")
|
||||
logger.info(f"subtitle corrected")
|
||||
else:
|
||||
logger.success(f"subtitle is correct")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
|
||||
task_dir = utils.task_dir(task_id)
|
||||
subtitle_file = f"{task_dir}/subtitle.srt"
|
||||
|
||||
subtitles = file_to_subtitles(subtitle_file)
|
||||
print(subtitles)
|
||||
|
||||
script_file = f"{task_dir}/script.json"
|
||||
with open(script_file, "r") as f:
|
||||
script_content = f.read()
|
||||
s = json.loads(script_content)
|
||||
script = s.get("script")
|
||||
|
||||
correct(subtitle_file, script)
|
||||
113
app/services/task.py
Normal file
113
app/services/task.py
Normal file
@@ -0,0 +1,113 @@
|
||||
from os import path
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from app.config import config
|
||||
from app.models.schema import VideoParams, VoiceNames
|
||||
from app.services import gpt, material, voice, video, subtitle
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
def _parse_voice(name: str):
|
||||
# "female-zh-CN-XiaoxiaoNeural",
|
||||
# remove first part split by "-"
|
||||
if name not in VoiceNames:
|
||||
name = VoiceNames[0]
|
||||
|
||||
parts = name.split("-")
|
||||
_lang = f"{parts[1]}-{parts[2]}"
|
||||
_voice = f"{_lang}-{parts[3]}"
|
||||
|
||||
return _voice, _lang
|
||||
|
||||
|
||||
def start(task_id, params: VideoParams):
|
||||
"""
|
||||
{
|
||||
"video_subject": "",
|
||||
"video_aspect": "横屏 16:9(西瓜视频)",
|
||||
"voice_name": "女生-晓晓",
|
||||
"enable_bgm": false,
|
||||
"font_name": "STHeitiMedium 黑体-中",
|
||||
"text_color": "#FFFFFF",
|
||||
"font_size": 60,
|
||||
"stroke_color": "#000000",
|
||||
"stroke_width": 1.5
|
||||
}
|
||||
"""
|
||||
logger.info(f"start task: {task_id}")
|
||||
video_subject = params.video_subject
|
||||
voice_name, language = _parse_voice(params.voice_name)
|
||||
paragraph_number = params.paragraph_number
|
||||
n_threads = params.n_threads
|
||||
|
||||
logger.info("\n\n## generating video script")
|
||||
script = gpt.generate_script(video_subject=video_subject, language=language, paragraph_number=paragraph_number)
|
||||
|
||||
logger.info("\n\n## generating video terms")
|
||||
search_terms = gpt.generate_terms(video_subject=video_subject, video_script=script, amount=5)
|
||||
|
||||
script_file = path.join(utils.task_dir(task_id), f"script.json")
|
||||
script_data = {
|
||||
"script": script,
|
||||
"search_terms": search_terms
|
||||
}
|
||||
|
||||
with open(script_file, "w") as f:
|
||||
f.write(utils.to_json(script_data))
|
||||
|
||||
audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
|
||||
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
|
||||
|
||||
logger.info("\n\n## generating audio")
|
||||
sub_maker = voice.tts(text=script, voice_name=voice_name, voice_file=audio_file)
|
||||
|
||||
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||
logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
|
||||
if subtitle_provider == "edge":
|
||||
voice.create_subtitle(text=script, sub_maker=sub_maker, subtitle_file=subtitle_path)
|
||||
if subtitle_provider == "whisper":
|
||||
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
|
||||
logger.info("\n\n## correcting subtitle")
|
||||
subtitle.correct(subtitle_file=subtitle_path, video_script=script)
|
||||
|
||||
logger.info("\n\n## downloading videos")
|
||||
video_paths = material.download_videos(task_id=task_id, search_terms=search_terms, video_aspect=params.video_aspect,
|
||||
wanted_count=20,
|
||||
minimum_duration=5)
|
||||
|
||||
logger.info("\n\n## combining videos")
|
||||
combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
|
||||
video.combine_videos(combined_video_path=combined_video_path,
|
||||
video_paths=video_paths,
|
||||
audio_file=audio_file,
|
||||
video_aspect=params.video_aspect,
|
||||
max_clip_duration=5,
|
||||
threads=n_threads)
|
||||
|
||||
final_video_path = path.join(utils.task_dir(task_id), f"final.mp4")
|
||||
|
||||
bgm_file = video.get_bgm_file(bgm_name=params.bgm_name)
|
||||
logger.info("\n\n## generating video")
|
||||
# Put everything together
|
||||
video.generate_video(video_path=combined_video_path,
|
||||
audio_path=audio_file,
|
||||
subtitle_path=subtitle_path,
|
||||
output_file=final_video_path,
|
||||
|
||||
video_aspect=params.video_aspect,
|
||||
|
||||
threads=n_threads,
|
||||
|
||||
font_name=params.font_name,
|
||||
fontsize=params.font_size,
|
||||
text_fore_color=params.text_fore_color,
|
||||
stroke_color=params.stroke_color,
|
||||
stroke_width=params.stroke_width,
|
||||
|
||||
bgm_file=bgm_file
|
||||
)
|
||||
logger.start(f"task {task_id} finished")
|
||||
return {
|
||||
"video_file": final_video_path,
|
||||
}
|
||||
246
app/services/video.py
Normal file
246
app/services/video.py
Normal file
@@ -0,0 +1,246 @@
|
||||
import glob
|
||||
import random
|
||||
from typing import List
|
||||
from PIL import ImageFont
|
||||
from loguru import logger
|
||||
from moviepy.editor import *
|
||||
from moviepy.video.fx.crop import crop
|
||||
from moviepy.video.tools.subtitles import SubtitlesClip
|
||||
|
||||
from app.models.schema import VideoAspect
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
def get_bgm_file(bgm_name: str = "random"):
|
||||
if not bgm_name:
|
||||
return ""
|
||||
if bgm_name == "random":
|
||||
suffix = "*.mp3"
|
||||
song_dir = utils.song_dir()
|
||||
# 使用glob.glob获取指定扩展名的文件列表
|
||||
files = glob.glob(os.path.join(song_dir, suffix))
|
||||
# 使用random.choice从列表中随机选择一个文件
|
||||
return random.choice(files)
|
||||
|
||||
file = os.path.join(utils.song_dir(), bgm_name)
|
||||
if os.path.exists(file):
|
||||
return file
|
||||
return ""
|
||||
|
||||
|
||||
def combine_videos(combined_video_path: str,
|
||||
video_paths: List[str],
|
||||
audio_file: str,
|
||||
video_aspect: VideoAspect = VideoAspect.portrait,
|
||||
max_clip_duration: int = 5,
|
||||
threads: int = 2,
|
||||
) -> str:
|
||||
logger.info(f"combining {len(video_paths)} videos into one file: {combined_video_path}")
|
||||
audio_clip = AudioFileClip(audio_file)
|
||||
max_duration = audio_clip.duration
|
||||
logger.info(f"max duration of audio: {max_duration} seconds")
|
||||
# Required duration of each clip
|
||||
req_dur = max_duration / len(video_paths)
|
||||
logger.info(f"each clip will be maximum {req_dur} seconds long")
|
||||
|
||||
aspect = VideoAspect(video_aspect)
|
||||
video_width, video_height = aspect.to_resolution()
|
||||
|
||||
clips = []
|
||||
tot_dur = 0
|
||||
# Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
|
||||
while tot_dur < max_duration:
|
||||
for video_path in video_paths:
|
||||
clip = VideoFileClip(video_path)
|
||||
clip = clip.without_audio()
|
||||
# Check if clip is longer than the remaining audio
|
||||
if (max_duration - tot_dur) < clip.duration:
|
||||
clip = clip.subclip(0, (max_duration - tot_dur))
|
||||
# Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
|
||||
elif req_dur < clip.duration:
|
||||
clip = clip.subclip(0, req_dur)
|
||||
clip = clip.set_fps(30)
|
||||
|
||||
# Not all videos are same size, so we need to resize them
|
||||
# logger.info(f"{video_path}: size is {clip.w} x {clip.h}, expected {video_width} x {video_height}")
|
||||
if clip.w != video_width or clip.h != video_height:
|
||||
if round((clip.w / clip.h), 4) < 0.5625:
|
||||
clip = crop(clip,
|
||||
width=clip.w,
|
||||
height=round(clip.w / 0.5625),
|
||||
x_center=clip.w / 2,
|
||||
y_center=clip.h / 2
|
||||
)
|
||||
else:
|
||||
clip = crop(clip,
|
||||
width=round(0.5625 * clip.h),
|
||||
height=clip.h,
|
||||
x_center=clip.w / 2,
|
||||
y_center=clip.h / 2
|
||||
)
|
||||
logger.info(f"resizing video to {video_width} x {video_height}")
|
||||
clip = clip.resize((video_width, video_height))
|
||||
|
||||
if clip.duration > max_clip_duration:
|
||||
clip = clip.subclip(0, max_clip_duration)
|
||||
|
||||
clips.append(clip)
|
||||
tot_dur += clip.duration
|
||||
|
||||
final_clip = concatenate_videoclips(clips)
|
||||
final_clip = final_clip.set_fps(30)
|
||||
logger.info(f"writing")
|
||||
final_clip.write_videofile(combined_video_path, threads=threads)
|
||||
logger.success(f"completed")
|
||||
return combined_video_path
|
||||
|
||||
|
||||
def wrap_text(text, max_width, font='Arial', fontsize=60):
|
||||
# 创建字体对象
|
||||
font = ImageFont.truetype(font, fontsize)
|
||||
|
||||
def get_text_size(inner_text):
|
||||
left, top, right, bottom = font.getbbox(inner_text)
|
||||
return right - left, bottom - top
|
||||
|
||||
width, height = get_text_size(text)
|
||||
if width <= max_width:
|
||||
return text
|
||||
|
||||
logger.warning(f"wrapping text, max_width: {max_width}, text_width: {width}, text: {text}")
|
||||
_wrapped_lines_ = []
|
||||
# 使用textwrap尝试分行,然后检查每行是否符合宽度限制
|
||||
|
||||
chars = list(text)
|
||||
_txt_ = ''
|
||||
for char in chars:
|
||||
_txt_ += char
|
||||
_width, _height = get_text_size(_txt_)
|
||||
if _width <= max_width:
|
||||
continue
|
||||
else:
|
||||
_wrapped_lines_.append(_txt_)
|
||||
_txt_ = ''
|
||||
_wrapped_lines_.append(_txt_)
|
||||
return '\n'.join(_wrapped_lines_)
|
||||
|
||||
|
||||
def generate_video(video_path: str,
|
||||
audio_path: str,
|
||||
subtitle_path: str,
|
||||
output_file: str,
|
||||
video_aspect: VideoAspect = VideoAspect.portrait,
|
||||
|
||||
threads: int = 2,
|
||||
|
||||
font_name: str = "",
|
||||
fontsize: int = 60,
|
||||
stroke_color: str = "#000000",
|
||||
stroke_width: float = 1.5,
|
||||
text_fore_color: str = "white",
|
||||
text_background_color: str = "transparent",
|
||||
|
||||
bgm_file: str = "",
|
||||
):
|
||||
aspect = VideoAspect(video_aspect)
|
||||
video_width, video_height = aspect.to_resolution()
|
||||
|
||||
logger.info(f"start, video size: {video_width} x {video_height}")
|
||||
logger.info(f" ① video: {video_path}")
|
||||
logger.info(f" ② audio: {audio_path}")
|
||||
logger.info(f" ③ subtitle: {subtitle_path}")
|
||||
logger.info(f" ④ output: {output_file}")
|
||||
|
||||
if not font_name:
|
||||
font_name = "STHeitiMedium.ttc"
|
||||
font_path = os.path.join(utils.font_dir(), font_name)
|
||||
logger.info(f"using font: {font_path}")
|
||||
|
||||
# 自定义的生成器函数,包含换行逻辑
|
||||
def generator(txt):
|
||||
# 应用自动换行
|
||||
wrapped_txt = wrap_text(txt, max_width=video_width - 100,
|
||||
font=font_path,
|
||||
fontsize=fontsize) # 调整max_width以适应你的视频
|
||||
return TextClip(
|
||||
wrapped_txt,
|
||||
font=font_path,
|
||||
fontsize=fontsize,
|
||||
color=text_fore_color,
|
||||
bg_color=text_background_color,
|
||||
stroke_color=stroke_color,
|
||||
stroke_width=stroke_width,
|
||||
print_cmd=False,
|
||||
)
|
||||
|
||||
position_height = video_height - 200
|
||||
if video_aspect == VideoAspect.landscape:
|
||||
position_height = video_height - 100
|
||||
|
||||
clips = [
|
||||
VideoFileClip(video_path),
|
||||
# subtitles.set_position(lambda _t: ('center', position_height))
|
||||
]
|
||||
# Burn the subtitles into the video
|
||||
if subtitle_path and os.path.exists(subtitle_path):
|
||||
subtitles = SubtitlesClip(subtitle_path, generator)
|
||||
clips.append(subtitles.set_position(lambda _t: ('center', position_height)))
|
||||
|
||||
result = CompositeVideoClip(clips)
|
||||
|
||||
# Add the audio
|
||||
audio = AudioFileClip(audio_path)
|
||||
result = result.set_audio(audio)
|
||||
|
||||
temp_output_file = f"{output_file}.temp.mp4"
|
||||
logger.info(f"writing to temp file: {temp_output_file}")
|
||||
result.write_videofile(temp_output_file, threads=threads or 2)
|
||||
|
||||
video_clip = VideoFileClip(temp_output_file)
|
||||
if bgm_file:
|
||||
logger.info(f"adding background music: {bgm_file}")
|
||||
# Add song to video at 30% volume using moviepy
|
||||
original_duration = video_clip.duration
|
||||
original_audio = video_clip.audio
|
||||
song_clip = AudioFileClip(bgm_file).set_fps(44100)
|
||||
# Set the volume of the song to 10% of the original volume
|
||||
song_clip = song_clip.volumex(0.2).set_fps(44100)
|
||||
# Add the song to the video
|
||||
comp_audio = CompositeAudioClip([original_audio, song_clip])
|
||||
video_clip = video_clip.set_audio(comp_audio)
|
||||
video_clip = video_clip.set_fps(30)
|
||||
video_clip = video_clip.set_duration(original_duration)
|
||||
# 编码为aac,否则iPhone里面无法播放
|
||||
logger.info(f"encoding audio codec to aac")
|
||||
video_clip.write_videofile(output_file, audio_codec="aac", threads=threads)
|
||||
# delete the temp file
|
||||
os.remove(temp_output_file)
|
||||
logger.success(f"completed")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟"
|
||||
font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
|
||||
t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
|
||||
print(t)
|
||||
|
||||
task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
|
||||
task_dir = utils.task_dir(task_id)
|
||||
video_file = f"{task_dir}/combined.mp4"
|
||||
audio_file = f"{task_dir}/audio.mp3"
|
||||
subtitle_file = f"{task_dir}/subtitle.srt"
|
||||
output_file = f"{task_dir}/final.mp4"
|
||||
generate_video(video_path=video_file,
|
||||
audio_path=audio_file,
|
||||
subtitle_path=subtitle_file,
|
||||
output_file=output_file,
|
||||
video_aspect=VideoAspect.portrait,
|
||||
threads=2,
|
||||
font_name="STHeitiMedium.ttc",
|
||||
fontsize=60,
|
||||
stroke_color="#000000",
|
||||
stroke_width=1.5,
|
||||
text_fore_color="white",
|
||||
text_background_color="transparent",
|
||||
bgm_file=""
|
||||
)
|
||||
101
app/services/voice.py
Normal file
101
app/services/voice.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import asyncio
|
||||
from xml.sax.saxutils import unescape
|
||||
from edge_tts.submaker import mktimestamp
|
||||
from loguru import logger
|
||||
from edge_tts import submaker, SubMaker
|
||||
import edge_tts
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
def tts(text: str, voice_name: str, voice_file: str) -> SubMaker:
|
||||
logger.info(f"start, voice name: {voice_name}")
|
||||
|
||||
async def _do() -> SubMaker:
|
||||
communicate = edge_tts.Communicate(text, voice_name)
|
||||
sub_maker = edge_tts.SubMaker()
|
||||
with open(voice_file, "wb") as file:
|
||||
async for chunk in communicate.stream():
|
||||
if chunk["type"] == "audio":
|
||||
file.write(chunk["data"])
|
||||
elif chunk["type"] == "WordBoundary":
|
||||
sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
|
||||
return sub_maker
|
||||
|
||||
sub_maker = asyncio.run(_do())
|
||||
logger.info(f"completed, output file: {voice_file}")
|
||||
return sub_maker
|
||||
|
||||
|
||||
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
|
||||
"""
|
||||
优化字幕文件
|
||||
1. 将字幕文件按照标点符号分割成多行
|
||||
2. 逐行匹配字幕文件中的文本
|
||||
3. 生成新的字幕文件
|
||||
"""
|
||||
|
||||
def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
|
||||
"""
|
||||
1
|
||||
00:00:00,000 --> 00:00:02,360
|
||||
跑步是一项简单易行的运动
|
||||
"""
|
||||
start_t = mktimestamp(start_time).replace(".", ",")
|
||||
end_t = mktimestamp(end_time).replace(".", ",")
|
||||
return (
|
||||
f"{idx}\n"
|
||||
f"{start_t} --> {end_t}\n"
|
||||
f"{sub_text}\n"
|
||||
)
|
||||
|
||||
start_time = -1.0
|
||||
sub_items = []
|
||||
sub_index = 0
|
||||
|
||||
script_lines = utils.split_string_by_punctuations(text)
|
||||
|
||||
sub_line = ""
|
||||
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
|
||||
_start_time, end_time = offset
|
||||
if start_time < 0:
|
||||
start_time = _start_time
|
||||
|
||||
sub = unescape(sub)
|
||||
sub_line += sub
|
||||
if sub_line == script_lines[sub_index]:
|
||||
sub_index += 1
|
||||
sub_items.append(formatter(
|
||||
idx=sub_index,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
sub_text=sub_line,
|
||||
))
|
||||
start_time = -1.0
|
||||
sub_line = ""
|
||||
|
||||
with open(subtitle_file, "w", encoding="utf-8") as file:
|
||||
file.write("\n".join(sub_items))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
temp_dir = utils.storage_dir("temp")
|
||||
|
||||
voice_names = [
|
||||
# 女性
|
||||
"zh-CN-XiaoxiaoNeural",
|
||||
"zh-CN-XiaoyiNeural",
|
||||
# 男性
|
||||
"zh-CN-YunyangNeural",
|
||||
"zh-CN-YunxiNeural",
|
||||
]
|
||||
text = """
|
||||
预计未来3天深圳冷空气活动频繁,未来两天持续阴天有小雨,出门带好雨具;
|
||||
10-11日持续阴天有小雨,日温差小,气温在13-17℃之间,体感阴凉;
|
||||
12日天气短暂好转,早晚清凉;
|
||||
"""
|
||||
|
||||
for voice_name in voice_names:
|
||||
voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
|
||||
subtitle_file = f"{temp_dir}/tts.mp3.srt"
|
||||
sub_maker = tts(text=text, voice_name=voice_name, voice_file=voice_file)
|
||||
create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
|
||||
Reference in New Issue
Block a user