This commit is contained in:
harry
2024-03-11 16:37:49 +08:00
parent d4f7b53b84
commit 06df797234
71 changed files with 2725 additions and 1 deletions

0
app/__init__.py Normal file
View File

60
app/asgi.py Normal file
View File

@@ -0,0 +1,60 @@
"""Application implementation - ASGI."""
from fastapi import FastAPI, Request
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse
from loguru import logger
from fastapi.staticfiles import StaticFiles
from app.config import config
from app.models.exception import HttpException
from app.router import root_api_router
from app.utils import utils
def exception_handler(request: Request, e: HttpException):
return JSONResponse(
status_code=e.status_code,
content=utils.get_response(e.status_code, e.data, e.message),
)
def validation_exception_handler(request: Request, e: RequestValidationError):
return JSONResponse(
status_code=400,
content=utils.get_response(status=400, data=e.errors(), message='field required'),
)
def get_application() -> FastAPI:
"""Initialize FastAPI application.
Returns:
FastAPI: Application object instance.
"""
instance = FastAPI(
title=config.project_name,
description=config.project_description,
version=config.project_version,
debug=False,
)
instance.include_router(root_api_router)
instance.add_exception_handler(HttpException, exception_handler)
instance.add_exception_handler(RequestValidationError, validation_exception_handler)
return instance
app = get_application()
public_dir = utils.public_dir()
app.mount("/", StaticFiles(directory=public_dir, html=True), name="")
@app.on_event("shutdown")
def shutdown_event():
logger.info("shutdown event")
@app.on_event("startup")
def startup_event():
logger.info("startup event")

51
app/config/__init__.py Normal file
View File

@@ -0,0 +1,51 @@
import os
import sys
from loguru import logger
from app.config import config
from app.utils import utils
def __init_logger():
_log_file = utils.storage_dir("logs/server.log")
_lvl = config.log_level
root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
def format_record(record):
# 获取日志记录中的文件全路径
file_path = record["file"].path
# 将绝对路径转换为相对于项目根目录的路径
relative_path = os.path.relpath(file_path, root_dir)
# 更新记录中的文件路径
record["file"].path = f"./{relative_path}"
# 返回修改后的格式字符串
# 您可以根据需要调整这里的格式
_format = '<green>{time:%Y-%m-%d %H:%M:%S}</> | ' + \
'<level>{level}</> | ' + \
'"{file.path}:{line}":<blue> {function}</> ' + \
'- <level>{message}</>' + "\n"
return _format
logger.remove()
logger.add(
sys.stdout,
level=_lvl,
format=format_record,
colorize=True,
)
logger.add(
_log_file,
level=_lvl,
format=format_record,
rotation="00:00",
retention="3 days",
backtrace=True,
diagnose=True,
enqueue=True,
)
__init_logger()

31
app/config/config.py Normal file
View File

@@ -0,0 +1,31 @@
import os
import tomli
from loguru import logger
root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
config_file = f"{root_dir}/config.toml"
logger.info(f"load config from file: {config_file}")
with open(config_file, mode="rb") as fp:
_cfg = tomli.load(fp)
app = _cfg.get("app", {})
whisper = _cfg.get("whisper", {})
hostname = os.uname().nodename
log_level = _cfg.get("log_level", "DEBUG")
listen_host = _cfg.get("listen_host", "0.0.0.0")
listen_port = _cfg.get("listen_port", 8080)
project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
project_description = _cfg.get("project_description", "MoneyPrinterTurbo\n by 抖音-网旭哈瑞.AI")
project_version = _cfg.get("project_version", "1.0.0")
reload_debug = False
__cfg = {
"hostname": hostname,
"listen_host": listen_host,
"listen_port": listen_port,
}
logger.info(__cfg)

27
app/controllers/base.py Normal file
View File

@@ -0,0 +1,27 @@
from uuid import uuid4
from fastapi import Request
from app.config import config
from app.models.exception import HttpException
def get_task_id(request: Request):
task_id = request.headers.get('x-task-id')
if not task_id:
task_id = uuid4()
return str(task_id)
def get_api_key(request: Request):
api_key = request.headers.get('x-api-key')
return api_key
def verify_token(request: Request):
token = get_api_key(request)
if token != config.app.get("api_key", ""):
request_id = get_task_id(request)
request_url = request.url
user_agent = request.headers.get('user-agent')
raise HttpException(task_id=request_id, status_code=401, message=f"invalid token: {request_url}, {user_agent}")

9
app/controllers/ping.py Normal file
View File

@@ -0,0 +1,9 @@
from fastapi import APIRouter
from fastapi import Request
router = APIRouter()
@router.get("/ping", tags=["Health Check"], description="检查服务可用性", response_description="pong")
def ping(request: Request) -> str:
return "pong"

View File

@@ -0,0 +1,11 @@
from fastapi import APIRouter, Depends
def new_router(dependencies=None):
router = APIRouter()
router.tags = ['V1']
router.prefix = '/api/v1'
# 将认证依赖项应用于所有路由
if dependencies:
router.dependencies = dependencies
return router

View File

@@ -0,0 +1,44 @@
from os import path
from fastapi import Request, Depends, Path
from loguru import logger
from app.controllers import base
from app.controllers.v1.base import new_router
from app.models.exception import HttpException
from app.models.schema import TaskVideoRequest, TaskQueryResponse, TaskResponse, TaskQueryRequest
from app.services import task as tm
from app.utils import utils
# 认证依赖项
# router = new_router(dependencies=[Depends(base.verify_token)])
router = new_router()
@router.post("/videos", response_model=TaskResponse, summary="使用主题来生成短视频")
async def create_video(request: Request, body: TaskVideoRequest):
task_id = utils.get_uuid()
request_id = base.get_task_id(request)
try:
task = {
"task_id": task_id,
"request_id": request_id,
}
body_dict = body.dict()
task.update(body_dict)
result = tm.start(task_id=task_id, params=body)
task["result"] = result
logger.success(f"video created: {utils.to_json(task)}")
return utils.get_response(200, task)
except ValueError as e:
raise HttpException(task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}")
@router.get("/tasks/{task_id}", response_model=TaskQueryResponse, summary="查询任务状态")
async def get_task(request: Request, task_id: str = Path(..., description="任务ID"),
query: TaskQueryRequest = Depends()):
request_id = base.get_task_id(request)
data = query.dict()
data["task_id"] = task_id
raise HttpException(task_id=task_id, status_code=404,
message=f"{request_id}: task not found", data=data)

0
app/models/__init__.py Normal file
View File

4
app/models/const.py Normal file
View File

@@ -0,0 +1,4 @@
punctuations = [
"?", ",", ".", "", ";",
"", "", "", "", "",
]

26
app/models/exception.py Normal file
View File

@@ -0,0 +1,26 @@
import traceback
from typing import Any
from loguru import logger
class HttpException(Exception):
def __init__(self, task_id: str, status_code: int, message: str = '', data: Any = None):
self.message = message
self.status_code = status_code
self.data = data
# 获取异常堆栈信息
tb_str = traceback.format_exc().strip()
if not tb_str or tb_str == "NoneType: None":
msg = f'HttpException: {status_code}, {task_id}, {message}'
else:
msg = f'HttpException: {status_code}, {task_id}, {message}\n{tb_str}'
if status_code == 400:
logger.warning(msg)
else:
logger.error(msg)
class FileNotFoundException(Exception):
pass

119
app/models/schema.py Normal file
View File

@@ -0,0 +1,119 @@
from enum import Enum
from typing import Any, Optional
from pydantic import BaseModel
import warnings
# 忽略 Pydantic 的特定警告
warnings.filterwarnings("ignore", category=UserWarning, message="Field name.*shadows an attribute in parent.*")
class VideoAspect(str, Enum):
landscape = "16:9"
portrait = "9:16"
square = "1:1"
def to_resolution(self):
if self == VideoAspect.landscape.value:
return 1920, 1080
elif self == VideoAspect.portrait.value:
return 1080, 1920
elif self == VideoAspect.square.value:
return 1080, 1080
return 1080, 1920
VoiceNames = [
# zh-CN
"female-zh-CN-XiaoxiaoNeural",
"female-zh-CN-XiaoyiNeural",
"female-zh-CN-liaoning-XiaobeiNeural",
"female-zh-CN-shaanxi-XiaoniNeural",
"male-zh-CN-YunjianNeural",
"male-zh-CN-YunxiNeural",
"male-zh-CN-YunxiaNeural",
"male-zh-CN-YunyangNeural",
# "female-zh-HK-HiuGaaiNeural",
# "female-zh-HK-HiuMaanNeural",
# "male-zh-HK-WanLungNeural",
#
# "female-zh-TW-HsiaoChenNeural",
# "female-zh-TW-HsiaoYuNeural",
# "male-zh-TW-YunJheNeural",
# en-US
"female-en-US-AnaNeural",
"female-en-US-AriaNeural",
"female-en-US-AvaNeural",
"female-en-US-EmmaNeural",
"female-en-US-JennyNeural",
"female-en-US-MichelleNeural",
"male-en-US-AndrewNeural",
"male-en-US-BrianNeural",
"male-en-US-ChristopherNeural",
"male-en-US-EricNeural",
"male-en-US-GuyNeural",
"male-en-US-RogerNeural",
"male-en-US-SteffanNeural",
]
class VideoParams:
"""
{
"video_subject": "",
"video_aspect": "横屏 16:9西瓜视频",
"voice_name": "女生-晓晓",
"bgm_name": "random",
"font_name": "STHeitiMedium 黑体-中",
"text_color": "#FFFFFF",
"font_size": 60,
"stroke_color": "#000000",
"stroke_width": 1.5
}
"""
video_subject: str
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
voice_name: Optional[str] = VoiceNames[0]
bgm_name: Optional[str] = "random"
font_name: Optional[str] = "STHeitiMedium.ttc"
text_fore_color: Optional[str] = "#FFFFFF"
font_size: int = 60
stroke_color: Optional[str] = "#000000"
stroke_width: float = 1.5
n_threads: Optional[int] = 2
paragraph_number: Optional[int] = 1
class BaseResponse(BaseModel):
status: int = 200
message: Optional[str] = 'success'
data: Any = None
class TaskVideoRequest(VideoParams, BaseModel):
pass
class TaskQueryRequest(BaseModel):
pass
######################################################################################################
######################################################################################################
######################################################################################################
######################################################################################################
class TaskResponse(BaseResponse):
class TaskResponseData(BaseModel):
task_id: str
task_type: str = ""
data: TaskResponseData
class TaskQueryResponse(BaseResponse):
pass

15
app/router.py Normal file
View File

@@ -0,0 +1,15 @@
"""Application configuration - root APIRouter.
Defines all FastAPI application endpoints.
Resources:
1. https://fastapi.tiangolo.com/tutorial/bigger-applications
"""
from fastapi import APIRouter
from app.controllers.v1 import video
root_api_router = APIRouter()
# v1
root_api_router.include_router(video.router)

0
app/services/__init__.py Normal file
View File

152
app/services/gpt.py Normal file
View File

@@ -0,0 +1,152 @@
import logging
import re
import json
import openai
from typing import List
from loguru import logger
from app.config import config
openai_api_key = config.app.get("openai_api_key")
if not openai_api_key:
raise ValueError("openai_api_key is not set, please set it in the config.toml file.")
openai_model_name = config.app.get("openai_model_name")
if not openai_model_name:
raise ValueError("openai_model_name is not set, please set it in the config.toml file.")
openai_base_url = config.app.get("openai_base_url")
openai.api_key = openai_api_key
openai_model_name = openai_model_name
if openai_base_url:
openai.base_url = openai_base_url
def _generate_response(prompt: str) -> str:
model_name = openai_model_name
response = openai.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": prompt}],
).choices[0].message.content
return response
def generate_script(video_subject: str, language: str = "zh-CN", paragraph_number: int = 1) -> str:
prompt = f"""
# Role: Video Script Generator
## Goals:
Generate a script for a video, depending on the subject of the video.
## Constrains:
1. the script is to be returned as a string with the specified number of paragraphs.
2. do not under any circumstance reference this prompt in your response.
3. get straight to the point, don't start with unnecessary things like, "welcome to this video".
4. you must not include any type of markdown or formatting in the script, never use a title.
5. only return the raw content of the script.
6. do not include "voiceover", "narrator" or similar indicators of what should be spoken at the beginning of each paragraph or line.
7. you must not mention the prompt, or anything about the script itself. also, never talk about the amount of paragraphs or lines. just write the script.
## Output Example:
What is the meaning of life. This question has puzzled philosophers.
# Initialization:
- video subject: {video_subject}
- output language: {language}
- number of paragraphs: {paragraph_number}
""".strip()
final_script = ""
logger.info(f"subject: {video_subject}")
logger.debug(f"prompt: \n{prompt}")
response = _generate_response(prompt=prompt)
# Return the generated script
if response:
# Clean the script
# Remove asterisks, hashes
response = response.replace("*", "")
response = response.replace("#", "")
# Remove markdown syntax
response = re.sub(r"\[.*\]", "", response)
response = re.sub(r"\(.*\)", "", response)
# Split the script into paragraphs
paragraphs = response.split("\n\n")
# Select the specified number of paragraphs
selected_paragraphs = paragraphs[:paragraph_number]
# Join the selected paragraphs into a single string
final_script = "\n\n".join(selected_paragraphs)
# Print to console the number of paragraphs used
# logger.info(f"number of paragraphs used: {len(selected_paragraphs)}")
else:
logging.error("gpt returned an empty response")
logger.success(f"completed: \n{final_script}")
return final_script
def generate_terms(video_subject: str, video_script: str, amount: int = 5) -> List[str]:
prompt = f"""
# Role: Video Search Terms Generator
## Goals:
Generate {amount} search terms for stock videos, depending on the subject of a video.
## Constrains:
1. the search terms are to be returned as a json-array of strings.
2. each search term should consist of 1-3 words, always add the main subject of the video.
3. you must only return the json-array of strings. you must not return anything else. you must not return the script.
4. the search terms must be related to the subject of the video.
5. reply with english search terms only.
## Output Example:
["search term 1", "search term 2", "search term 3","search term 4","search term 5"]
## Context:
### Video Subject
{video_subject}
### Video Script
{video_script}
""".strip()
logger.info(f"subject: {video_subject}")
logger.debug(f"prompt: \n{prompt}")
response = _generate_response(prompt)
search_terms = []
try:
search_terms = json.loads(response)
if not isinstance(search_terms, list) or not all(isinstance(term, str) for term in search_terms):
raise ValueError("response is not a list of strings.")
except (json.JSONDecodeError, ValueError):
# logger.warning(f"gpt returned an unformatted response. attempting to clean...")
# Attempt to extract list-like string and convert to list
match = re.search(r'\["(?:[^"\\]|\\.)*"(?:,\s*"[^"\\]*")*\]', response)
if match:
try:
search_terms = json.loads(match.group())
except json.JSONDecodeError:
logger.error(f"could not parse response: {response}")
return []
logger.success(f"completed: \n{search_terms}")
return search_terms
if __name__ == "__main__":
video_subject = "生命的意义是什么"
script = generate_script(video_subject=video_subject, language="zh-CN", paragraph_number=1)
# print("######################")
# print(script)
search_terms = generate_terms(video_subject=video_subject, video_script=script, amount=5)
# print("######################")
# print(search_terms)

112
app/services/material.py Normal file
View File

@@ -0,0 +1,112 @@
import time
import requests
from typing import List
from loguru import logger
from app.config import config
from app.models.schema import VideoAspect
from app.utils import utils
requested_count = 0
pexels_api_keys = config.app.get("pexels_api_keys")
if not pexels_api_keys:
raise ValueError("pexels_api_keys is not set, please set it in the config.toml file.")
def round_robin_api_key():
global requested_count
requested_count += 1
return pexels_api_keys[requested_count % len(pexels_api_keys)]
def search_videos(search_term: str,
wanted_count: int,
minimum_duration: int,
video_aspect: VideoAspect = VideoAspect.portrait,
locale: str = "zh-CN"
) -> List[str]:
aspect = VideoAspect(video_aspect)
video_orientation = aspect.name
video_width, video_height = aspect.to_resolution()
headers = {
"Authorization": round_robin_api_key()
}
# Build URL
query_url = f"https://api.pexels.com/videos/search?query={search_term}&per_page=15&orientation={video_orientation}&locale={locale}"
logger.info(f"searching videos: {query_url}")
# Send the request
r = requests.get(query_url, headers=headers)
# Parse the response
response = r.json()
video_urls = []
try:
videos_count = min(len(response["videos"]), wanted_count)
# loop through each video in the result
for i in range(videos_count):
# check if video has desired minimum duration
if response["videos"][i]["duration"] < minimum_duration:
continue
video_files = response["videos"][i]["video_files"]
# loop through each url to determine the best quality
for video in video_files:
# Check if video has a valid download link
# if ".com/external" in video["link"]:
w = int(video["width"])
h = int(video["height"])
if w == video_width and h == video_height:
video_urls.append(video["link"])
break
except Exception as e:
logger.error(f"search videos failed: {e}")
return video_urls
def save_video(video_url: str, save_dir: str) -> str:
video_id = f"vid-{str(int(time.time() * 1000))}"
video_path = f"{save_dir}/{video_id}.mp4"
with open(video_path, "wb") as f:
f.write(requests.get(video_url).content)
return video_path
def download_videos(task_id: str,
search_terms: List[str],
video_aspect: VideoAspect = VideoAspect.portrait,
wanted_count: int = 15,
minimum_duration: int = 5
) -> List[str]:
valid_video_urls = []
for search_term in search_terms:
# logger.info(f"searching videos for '{search_term}'")
video_urls = search_videos(search_term=search_term,
wanted_count=wanted_count,
minimum_duration=minimum_duration,
video_aspect=video_aspect)
logger.info(f"found {len(video_urls)} videos for '{search_term}'")
i = 0
for url in video_urls:
if url not in valid_video_urls:
valid_video_urls.append(url)
i += 1
if i >= 3:
break
logger.info(f"downloading videos: {len(valid_video_urls)}")
video_paths = []
save_dir = utils.task_dir(task_id)
for video_url in valid_video_urls:
try:
saved_video_path = save_video(video_url, save_dir)
video_paths.append(saved_video_path)
except Exception as e:
logger.error(f"failed to download video: {video_url}, {e}")
logger.success(f"downloaded {len(video_paths)} videos")
return video_paths

167
app/services/subtitle.py Normal file
View File

@@ -0,0 +1,167 @@
import json
import re
from faster_whisper import WhisperModel
from timeit import default_timer as timer
from loguru import logger
from app.config import config
from app.models import const
from app.utils import utils
model_size = config.whisper.get("model_size", "large-v3")
device = config.whisper.get("device", "cpu")
compute_type = config.whisper.get("compute_type", "int8")
model = WhisperModel(model_size_or_path=model_size, device=device, compute_type=compute_type)
def create(audio_file, subtitle_file: str = ""):
logger.info(f"start, output file: {subtitle_file}")
if not subtitle_file:
subtitle_file = f"{audio_file}.srt"
segments, info = model.transcribe(
audio_file,
beam_size=5,
word_timestamps=True,
vad_filter=True,
vad_parameters=dict(min_silence_duration_ms=500),
)
logger.info(f"detected language: '{info.language}', probability: {info.language_probability:.2f}")
start = timer()
subtitles = []
def recognized(seg_text, seg_start, seg_end):
seg_text = seg_text.strip()
if not seg_text:
return
msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
logger.debug(msg)
subtitles.append({
"msg": seg_text,
"start_time": seg_start,
"end_time": seg_end
})
for segment in segments:
words_idx = 0
words_len = len(segment.words)
seg_start = 0
seg_end = 0
seg_text = ""
if segment.words:
is_segmented = False
for word in segment.words:
if not is_segmented:
seg_start = word.start
is_segmented = True
seg_end = word.end
# 如果包含标点,则断句
seg_text += word.word
if utils.str_contains_punctuation(word.word):
# remove last char
seg_text = seg_text[:-1]
if not seg_text:
continue
recognized(seg_text, seg_start, seg_end)
is_segmented = False
seg_text = ""
if words_idx == 0 and segment.start < word.start:
seg_start = word.start
if words_idx == (words_len - 1) and segment.end > word.end:
seg_end = word.end
words_idx += 1
if not seg_text:
continue
recognized(seg_text, seg_start, seg_end)
end = timer()
diff = end - start
logger.info(f"complete, elapsed: {diff:.2f} s")
idx = 1
lines = []
for subtitle in subtitles:
text = subtitle.get("msg")
if text:
lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time")))
idx += 1
sub = "\n".join(lines)
with open(subtitle_file, "w") as f:
f.write(sub)
logger.info(f"subtitle file created: {subtitle_file}")
def file_to_subtitles(filename):
times_texts = []
current_times = None
current_text = ""
index = 0
with open(filename, 'r') as f:
for line in f:
times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
if times:
current_times = line
elif line.strip() == '' and current_times:
index += 1
times_texts.append((index, current_times.strip(), current_text.strip()))
current_times, current_text = None, ""
elif current_times:
current_text += line
return times_texts
def correct(subtitle_file, video_script):
subtitle_items = file_to_subtitles(subtitle_file)
script_lines = utils.split_string_by_punctuations(video_script)
corrected = False
if len(subtitle_items) == len(script_lines):
for i in range(len(script_lines)):
script_line = script_lines[i].strip()
subtitle_line = subtitle_items[i][2]
if script_line != subtitle_line:
logger.warning(f"line {i + 1}, script: {script_line}, subtitle: {subtitle_line}")
subtitle_items[i] = (subtitle_items[i][0], subtitle_items[i][1], script_line)
corrected = True
if corrected:
with open(subtitle_file, "w") as fd:
for item in subtitle_items:
fd.write(f"{item[0]}\n{item[1]}\n{item[2]}\n\n")
logger.info(f"subtitle corrected")
else:
logger.success(f"subtitle is correct")
if __name__ == "__main__":
task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
task_dir = utils.task_dir(task_id)
subtitle_file = f"{task_dir}/subtitle.srt"
subtitles = file_to_subtitles(subtitle_file)
print(subtitles)
script_file = f"{task_dir}/script.json"
with open(script_file, "r") as f:
script_content = f.read()
s = json.loads(script_content)
script = s.get("script")
correct(subtitle_file, script)

113
app/services/task.py Normal file
View File

@@ -0,0 +1,113 @@
from os import path
from loguru import logger
from app.config import config
from app.models.schema import VideoParams, VoiceNames
from app.services import gpt, material, voice, video, subtitle
from app.utils import utils
def _parse_voice(name: str):
# "female-zh-CN-XiaoxiaoNeural",
# remove first part split by "-"
if name not in VoiceNames:
name = VoiceNames[0]
parts = name.split("-")
_lang = f"{parts[1]}-{parts[2]}"
_voice = f"{_lang}-{parts[3]}"
return _voice, _lang
def start(task_id, params: VideoParams):
"""
{
"video_subject": "",
"video_aspect": "横屏 16:9西瓜视频",
"voice_name": "女生-晓晓",
"enable_bgm": false,
"font_name": "STHeitiMedium 黑体-中",
"text_color": "#FFFFFF",
"font_size": 60,
"stroke_color": "#000000",
"stroke_width": 1.5
}
"""
logger.info(f"start task: {task_id}")
video_subject = params.video_subject
voice_name, language = _parse_voice(params.voice_name)
paragraph_number = params.paragraph_number
n_threads = params.n_threads
logger.info("\n\n## generating video script")
script = gpt.generate_script(video_subject=video_subject, language=language, paragraph_number=paragraph_number)
logger.info("\n\n## generating video terms")
search_terms = gpt.generate_terms(video_subject=video_subject, video_script=script, amount=5)
script_file = path.join(utils.task_dir(task_id), f"script.json")
script_data = {
"script": script,
"search_terms": search_terms
}
with open(script_file, "w") as f:
f.write(utils.to_json(script_data))
audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
logger.info("\n\n## generating audio")
sub_maker = voice.tts(text=script, voice_name=voice_name, voice_file=audio_file)
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
if subtitle_provider == "edge":
voice.create_subtitle(text=script, sub_maker=sub_maker, subtitle_file=subtitle_path)
if subtitle_provider == "whisper":
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
logger.info("\n\n## correcting subtitle")
subtitle.correct(subtitle_file=subtitle_path, video_script=script)
logger.info("\n\n## downloading videos")
video_paths = material.download_videos(task_id=task_id, search_terms=search_terms, video_aspect=params.video_aspect,
wanted_count=20,
minimum_duration=5)
logger.info("\n\n## combining videos")
combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
video.combine_videos(combined_video_path=combined_video_path,
video_paths=video_paths,
audio_file=audio_file,
video_aspect=params.video_aspect,
max_clip_duration=5,
threads=n_threads)
final_video_path = path.join(utils.task_dir(task_id), f"final.mp4")
bgm_file = video.get_bgm_file(bgm_name=params.bgm_name)
logger.info("\n\n## generating video")
# Put everything together
video.generate_video(video_path=combined_video_path,
audio_path=audio_file,
subtitle_path=subtitle_path,
output_file=final_video_path,
video_aspect=params.video_aspect,
threads=n_threads,
font_name=params.font_name,
fontsize=params.font_size,
text_fore_color=params.text_fore_color,
stroke_color=params.stroke_color,
stroke_width=params.stroke_width,
bgm_file=bgm_file
)
logger.start(f"task {task_id} finished")
return {
"video_file": final_video_path,
}

246
app/services/video.py Normal file
View File

@@ -0,0 +1,246 @@
import glob
import random
from typing import List
from PIL import ImageFont
from loguru import logger
from moviepy.editor import *
from moviepy.video.fx.crop import crop
from moviepy.video.tools.subtitles import SubtitlesClip
from app.models.schema import VideoAspect
from app.utils import utils
def get_bgm_file(bgm_name: str = "random"):
if not bgm_name:
return ""
if bgm_name == "random":
suffix = "*.mp3"
song_dir = utils.song_dir()
# 使用glob.glob获取指定扩展名的文件列表
files = glob.glob(os.path.join(song_dir, suffix))
# 使用random.choice从列表中随机选择一个文件
return random.choice(files)
file = os.path.join(utils.song_dir(), bgm_name)
if os.path.exists(file):
return file
return ""
def combine_videos(combined_video_path: str,
video_paths: List[str],
audio_file: str,
video_aspect: VideoAspect = VideoAspect.portrait,
max_clip_duration: int = 5,
threads: int = 2,
) -> str:
logger.info(f"combining {len(video_paths)} videos into one file: {combined_video_path}")
audio_clip = AudioFileClip(audio_file)
max_duration = audio_clip.duration
logger.info(f"max duration of audio: {max_duration} seconds")
# Required duration of each clip
req_dur = max_duration / len(video_paths)
logger.info(f"each clip will be maximum {req_dur} seconds long")
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
clips = []
tot_dur = 0
# Add downloaded clips over and over until the duration of the audio (max_duration) has been reached
while tot_dur < max_duration:
for video_path in video_paths:
clip = VideoFileClip(video_path)
clip = clip.without_audio()
# Check if clip is longer than the remaining audio
if (max_duration - tot_dur) < clip.duration:
clip = clip.subclip(0, (max_duration - tot_dur))
# Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
elif req_dur < clip.duration:
clip = clip.subclip(0, req_dur)
clip = clip.set_fps(30)
# Not all videos are same size, so we need to resize them
# logger.info(f"{video_path}: size is {clip.w} x {clip.h}, expected {video_width} x {video_height}")
if clip.w != video_width or clip.h != video_height:
if round((clip.w / clip.h), 4) < 0.5625:
clip = crop(clip,
width=clip.w,
height=round(clip.w / 0.5625),
x_center=clip.w / 2,
y_center=clip.h / 2
)
else:
clip = crop(clip,
width=round(0.5625 * clip.h),
height=clip.h,
x_center=clip.w / 2,
y_center=clip.h / 2
)
logger.info(f"resizing video to {video_width} x {video_height}")
clip = clip.resize((video_width, video_height))
if clip.duration > max_clip_duration:
clip = clip.subclip(0, max_clip_duration)
clips.append(clip)
tot_dur += clip.duration
final_clip = concatenate_videoclips(clips)
final_clip = final_clip.set_fps(30)
logger.info(f"writing")
final_clip.write_videofile(combined_video_path, threads=threads)
logger.success(f"completed")
return combined_video_path
def wrap_text(text, max_width, font='Arial', fontsize=60):
# 创建字体对象
font = ImageFont.truetype(font, fontsize)
def get_text_size(inner_text):
left, top, right, bottom = font.getbbox(inner_text)
return right - left, bottom - top
width, height = get_text_size(text)
if width <= max_width:
return text
logger.warning(f"wrapping text, max_width: {max_width}, text_width: {width}, text: {text}")
_wrapped_lines_ = []
# 使用textwrap尝试分行然后检查每行是否符合宽度限制
chars = list(text)
_txt_ = ''
for char in chars:
_txt_ += char
_width, _height = get_text_size(_txt_)
if _width <= max_width:
continue
else:
_wrapped_lines_.append(_txt_)
_txt_ = ''
_wrapped_lines_.append(_txt_)
return '\n'.join(_wrapped_lines_)
def generate_video(video_path: str,
audio_path: str,
subtitle_path: str,
output_file: str,
video_aspect: VideoAspect = VideoAspect.portrait,
threads: int = 2,
font_name: str = "",
fontsize: int = 60,
stroke_color: str = "#000000",
stroke_width: float = 1.5,
text_fore_color: str = "white",
text_background_color: str = "transparent",
bgm_file: str = "",
):
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
logger.info(f"start, video size: {video_width} x {video_height}")
logger.info(f" ① video: {video_path}")
logger.info(f" ② audio: {audio_path}")
logger.info(f" ③ subtitle: {subtitle_path}")
logger.info(f" ④ output: {output_file}")
if not font_name:
font_name = "STHeitiMedium.ttc"
font_path = os.path.join(utils.font_dir(), font_name)
logger.info(f"using font: {font_path}")
# 自定义的生成器函数,包含换行逻辑
def generator(txt):
# 应用自动换行
wrapped_txt = wrap_text(txt, max_width=video_width - 100,
font=font_path,
fontsize=fontsize) # 调整max_width以适应你的视频
return TextClip(
wrapped_txt,
font=font_path,
fontsize=fontsize,
color=text_fore_color,
bg_color=text_background_color,
stroke_color=stroke_color,
stroke_width=stroke_width,
print_cmd=False,
)
position_height = video_height - 200
if video_aspect == VideoAspect.landscape:
position_height = video_height - 100
clips = [
VideoFileClip(video_path),
# subtitles.set_position(lambda _t: ('center', position_height))
]
# Burn the subtitles into the video
if subtitle_path and os.path.exists(subtitle_path):
subtitles = SubtitlesClip(subtitle_path, generator)
clips.append(subtitles.set_position(lambda _t: ('center', position_height)))
result = CompositeVideoClip(clips)
# Add the audio
audio = AudioFileClip(audio_path)
result = result.set_audio(audio)
temp_output_file = f"{output_file}.temp.mp4"
logger.info(f"writing to temp file: {temp_output_file}")
result.write_videofile(temp_output_file, threads=threads or 2)
video_clip = VideoFileClip(temp_output_file)
if bgm_file:
logger.info(f"adding background music: {bgm_file}")
# Add song to video at 30% volume using moviepy
original_duration = video_clip.duration
original_audio = video_clip.audio
song_clip = AudioFileClip(bgm_file).set_fps(44100)
# Set the volume of the song to 10% of the original volume
song_clip = song_clip.volumex(0.2).set_fps(44100)
# Add the song to the video
comp_audio = CompositeAudioClip([original_audio, song_clip])
video_clip = video_clip.set_audio(comp_audio)
video_clip = video_clip.set_fps(30)
video_clip = video_clip.set_duration(original_duration)
# 编码为aac否则iPhone里面无法播放
logger.info(f"encoding audio codec to aac")
video_clip.write_videofile(output_file, audio_codec="aac", threads=threads)
# delete the temp file
os.remove(temp_output_file)
logger.success(f"completed")
if __name__ == "__main__":
txt = "hello 幸福经常被描述为最终人生目标和人类追求的核心 但它通常涉及对个人生活中意义和目的的深刻感悟"
font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
t = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
print(t)
task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
task_dir = utils.task_dir(task_id)
video_file = f"{task_dir}/combined.mp4"
audio_file = f"{task_dir}/audio.mp3"
subtitle_file = f"{task_dir}/subtitle.srt"
output_file = f"{task_dir}/final.mp4"
generate_video(video_path=video_file,
audio_path=audio_file,
subtitle_path=subtitle_file,
output_file=output_file,
video_aspect=VideoAspect.portrait,
threads=2,
font_name="STHeitiMedium.ttc",
fontsize=60,
stroke_color="#000000",
stroke_width=1.5,
text_fore_color="white",
text_background_color="transparent",
bgm_file=""
)

101
app/services/voice.py Normal file
View File

@@ -0,0 +1,101 @@
import asyncio
from xml.sax.saxutils import unescape
from edge_tts.submaker import mktimestamp
from loguru import logger
from edge_tts import submaker, SubMaker
import edge_tts
from app.utils import utils
def tts(text: str, voice_name: str, voice_file: str) -> SubMaker:
logger.info(f"start, voice name: {voice_name}")
async def _do() -> SubMaker:
communicate = edge_tts.Communicate(text, voice_name)
sub_maker = edge_tts.SubMaker()
with open(voice_file, "wb") as file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
file.write(chunk["data"])
elif chunk["type"] == "WordBoundary":
sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
return sub_maker
sub_maker = asyncio.run(_do())
logger.info(f"completed, output file: {voice_file}")
return sub_maker
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
"""
优化字幕文件
1. 将字幕文件按照标点符号分割成多行
2. 逐行匹配字幕文件中的文本
3. 生成新的字幕文件
"""
def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
"""
1
00:00:00,000 --> 00:00:02,360
跑步是一项简单易行的运动
"""
start_t = mktimestamp(start_time).replace(".", ",")
end_t = mktimestamp(end_time).replace(".", ",")
return (
f"{idx}\n"
f"{start_t} --> {end_t}\n"
f"{sub_text}\n"
)
start_time = -1.0
sub_items = []
sub_index = 0
script_lines = utils.split_string_by_punctuations(text)
sub_line = ""
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
_start_time, end_time = offset
if start_time < 0:
start_time = _start_time
sub = unescape(sub)
sub_line += sub
if sub_line == script_lines[sub_index]:
sub_index += 1
sub_items.append(formatter(
idx=sub_index,
start_time=start_time,
end_time=end_time,
sub_text=sub_line,
))
start_time = -1.0
sub_line = ""
with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items))
if __name__ == "__main__":
temp_dir = utils.storage_dir("temp")
voice_names = [
# 女性
"zh-CN-XiaoxiaoNeural",
"zh-CN-XiaoyiNeural",
# 男性
"zh-CN-YunyangNeural",
"zh-CN-YunxiNeural",
]
text = """
预计未来3天深圳冷空气活动频繁未来两天持续阴天有小雨出门带好雨具
10-11日持续阴天有小雨日温差小气温在13-17℃之间体感阴凉
12日天气短暂好转早晚清凉
"""
for voice_name in voice_names:
voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
subtitle_file = f"{temp_dir}/tts.mp3.srt"
sub_maker = tts(text=text, voice_name=voice_name, voice_file=voice_file)
create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)

167
app/utils/utils.py Normal file
View File

@@ -0,0 +1,167 @@
import os
import threading
from typing import Any
from loguru import logger
import json
from uuid import uuid4
import urllib3
from app.models import const
urllib3.disable_warnings()
def get_response(status: int, data: Any = None, message: str = ""):
obj = {
'status': status,
}
if data:
obj['data'] = data
if message:
obj['message'] = message
return obj
def to_json(obj):
# 定义一个辅助函数来处理不同类型的对象
def serialize(o):
# 如果对象是可序列化类型,直接返回
if isinstance(o, (int, float, bool, str)) or o is None:
return o
# 如果对象是二进制数据转换为base64编码的字符串
elif isinstance(o, bytes):
return "*** binary data ***"
# 如果对象是字典,递归处理每个键值对
elif isinstance(o, dict):
return {k: serialize(v) for k, v in o.items()}
# 如果对象是列表或元组,递归处理每个元素
elif isinstance(o, (list, tuple)):
return [serialize(item) for item in o]
# 如果对象是自定义类型尝试返回其__dict__属性
elif hasattr(o, '__dict__'):
return serialize(o.__dict__)
# 其他情况返回None或者可以选择抛出异常
else:
return None
# 使用serialize函数处理输入对象
serialized_obj = serialize(obj)
# 序列化处理后的对象为JSON字符串
return json.dumps(serialized_obj, ensure_ascii=False, indent=4)
def get_uuid(remove_hyphen: bool = False):
u = str(uuid4())
if remove_hyphen:
u = u.replace("-", "")
return u
def root_dir():
return os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
def storage_dir(sub_dir: str = ""):
d = os.path.join(root_dir(), "storage")
if sub_dir:
d = os.path.join(d, sub_dir)
return d
def resource_dir(sub_dir: str = ""):
d = os.path.join(root_dir(), "resource")
if sub_dir:
d = os.path.join(d, sub_dir)
return d
def task_dir(sub_dir: str = ""):
d = os.path.join(storage_dir(), "tasks")
if sub_dir:
d = os.path.join(d, sub_dir)
if not os.path.exists(d):
os.makedirs(d)
return d
def font_dir(sub_dir: str = ""):
d = resource_dir(f"fonts")
if sub_dir:
d = os.path.join(d, sub_dir)
if not os.path.exists(d):
os.makedirs(d)
return d
def song_dir(sub_dir: str = ""):
d = resource_dir(f"songs")
if sub_dir:
d = os.path.join(d, sub_dir)
if not os.path.exists(d):
os.makedirs(d)
return d
def public_dir(sub_dir: str = ""):
d = resource_dir(f"public")
if sub_dir:
d = os.path.join(d, sub_dir)
if not os.path.exists(d):
os.makedirs(d)
return d
def run_in_background(func, *args, **kwargs):
def run():
try:
func(*args, **kwargs)
except Exception as e:
logger.error(f"run_in_background error: {e}")
thread = threading.Thread(target=run)
thread.start()
return thread
def time_convert_seconds_to_hmsm(seconds) -> str:
hours = int(seconds // 3600)
seconds = seconds % 3600
minutes = int(seconds // 60)
milliseconds = int(seconds * 1000) % 1000
seconds = int(seconds % 60)
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
start_time = time_convert_seconds_to_hmsm(start_time)
end_time = time_convert_seconds_to_hmsm(end_time)
srt = """%d
%s --> %s
%s
""" % (
idx,
start_time,
end_time,
msg,
)
return srt
def str_contains_punctuation(word):
for p in const.punctuations:
if p in word:
return True
return False
def split_string_by_punctuations(s):
result = []
txt = ""
for char in s:
if char not in const.punctuations:
txt += char
else:
result.append(txt.strip())
txt = ""
return result