Update version to 1.2.2

Merge pull request #541 from yyhhyyyyyy/update-requirements
⬆️ deps: Upgrade dependencies to latest versions and address minor issues
2024-12-06 13:45:43 +08:00 · 2024-12-05 11:02:14 +08:00 · 2024-12-05 10:59:41 +08:00 · 2024-12-05 10:47:26 +08:00 · 2024-12-05 10:34:09 +08:00 · 2024-12-05 10:16:38 +08:00
39 changed files with 1501 additions and 668 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,6 @@ node_modules
 /sites/docs/.vuepress/.cache
 # VuePress 默认构建生成的静态文件目录
 /sites/docs/.vuepress/dist
 # 模型目录
 /models/
 ./models/*
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 # Use an official Python runtime as a parent image
-FROM python:3.10-slim-bullseye
+FROM python:3.11-slim-bullseye
 # Set the working directory in the container
 WORKDIR /MoneyPrinterTurbo
--- a/README-en.md
+++ b/README-en.md
@@ -152,6 +152,8 @@ cd MoneyPrinterTurbo
 docker-compose up
 ```
 > Note：The latest version of docker will automatically install docker compose in the form of a plug-in, and the start command is adjusted to `docker compose up `
 #### ② Access the Web Interface
 Open your browser and visit http://0.0.0.0:8501
@@ -170,7 +172,7 @@ using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index
 ```shell
 git clone https://github.com/harry0703/MoneyPrinterTurbo.git
 cd MoneyPrinterTurbo
-conda create -n MoneyPrinterTurbo python=3.10
+conda create -n MoneyPrinterTurbo python=3.11
 conda activate MoneyPrinterTurbo
 pip install -r requirements.txt
 ```
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@
 - [x] 支持 **背景音乐**，随机或者指定音乐文件，可设置`背景音乐音量`
 - [x] 视频素材来源 **高清**，而且 **无版权**，也可以使用自己的 **本地素材**
 - [x] 支持 **OpenAI**、**Moonshot**、**Azure**、**gpt4free**、**one-api**、**通义千问**、**Google Gemini**、**Ollama**、
-  **DeepSeek** 等多种模型接入
+  **DeepSeek**、 **文心一言** 等多种模型接入
    - 中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商（国内可直接访问，不需要VPN。注册就送额度，基本够用）
 ### 后期计划 📅
@@ -121,11 +121,10 @@
 ## 快速开始 🚀
-下载一键启动包，解压直接使用（路径不要有 **中文** 和 **空格**）
+下载一键启动包，解压直接使用（路径不要有 **中文**、**特殊字符**、**空格**）
 ### Windows
-
+- 百度网盘（1.2.1 最新版本）: https://pan.baidu.com/s/1pSNjxTYiVENulTLm6zieMQ?pwd=g36q 提取码: g36q
 - 百度网盘: https://pan.baidu.com/s/1jKF1mgsjfN8fBk6uTEHArQ?pwd=jrp7 提取码: jrp7
 下载后，建议先**双击执行** `update.bat` 更新到**最新代码**，然后双击 `start.bat` 启动
@@ -170,6 +169,8 @@ cd MoneyPrinterTurbo
 docker-compose up
 ```
 > 注意：最新版的docker安装时会自动以插件的形式安装docker compose，启动命令调整为docker compose up
 #### ② 访问Web界面
 打开浏览器，访问 http://0.0.0.0:8501
@@ -192,7 +193,7 @@ docker-compose up
 ```shell
 git clone https://github.com/harry0703/MoneyPrinterTurbo.git
 cd MoneyPrinterTurbo
-conda create -n MoneyPrinterTurbo python=3.10
+conda create -n MoneyPrinterTurbo python=3.11
 conda activate MoneyPrinterTurbo
 pip install -r requirements.txt
 ```
--- a/app/asgi.py
+++ b/app/asgi.py
@@ -1,4 +1,5 @@
 """Application implementation - ASGI."""
 import os
 from fastapi import FastAPI, Request
@@ -24,7 +25,9 @@ def exception_handler(request: Request, e: HttpException):
 def validation_exception_handler(request: Request, e: RequestValidationError):
    return JSONResponse(
        status_code=400,
-        content=utils.get_response(status=400, data=e.errors(), message='field required'),
+        content=utils.get_response(
            status=400, data=e.errors(), message="field required"
        ),
    )
@@ -61,7 +64,9 @@ app.add_middleware(
 )
 task_dir = utils.task_dir()
-app.mount("/tasks", StaticFiles(directory=task_dir, html=True, follow_symlink=True), name="")
+app.mount(
    "/tasks", StaticFiles(directory=task_dir, html=True, follow_symlink=True), name=""
 )
 public_dir = utils.public_dir()
 app.mount("/", StaticFiles(directory=public_dir, html=True), name="")
--- a/app/config/init.py
+++ b/app/config/init.py
@@ -10,7 +10,9 @@ from app.utils import utils
 def __init_logger():
    # _log_file = utils.storage_dir("logs/server.log")
    _lvl = config.log_level
-    root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+    root_dir = os.path.dirname(
        os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
    )
    def format_record(record):
        # 获取日志记录中的文件全路径
@@ -21,10 +23,13 @@ def __init_logger():
        record["file"].path = f"./{relative_path}"
        # 返回修改后的格式字符串
        # 您可以根据需要调整这里的格式
-        _format = '<green>{time:%Y-%m-%d %H:%M:%S}</> | ' + \
+        _format = (
-                  '<level>{level}</> | ' + \
+            "<green>{time:%Y-%m-%d %H:%M:%S}</> | "
-                  '"{file.path}:{line}":<blue> {function}</> ' + \
+            + "<level>{level}</> | "
-                  '- <level>{message}</>' + "\n"
+            + '"{file.path}:{line}":<blue> {function}</> '
            + "- <level>{message}</>"
            + "\n"
        )
        return _format
    logger.remove()
--- a/app/config/config.py
+++ b/app/config/config.py
@@ -25,7 +25,7 @@ def load_config():
        _config_ = toml.load(config_file)
    except Exception as e:
        logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig")
-        with open(config_file, mode="r", encoding='utf-8-sig') as fp:
+        with open(config_file, mode="r", encoding="utf-8-sig") as fp:
            _cfg_content = fp.read()
            _config_ = toml.loads(_cfg_content)
    return _config_
@@ -52,9 +52,11 @@ log_level = _cfg.get("log_level", "DEBUG")
 listen_host = _cfg.get("listen_host", "0.0.0.0")
 listen_port = _cfg.get("listen_port", 8080)
 project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
-project_description = _cfg.get("project_description",
+project_description = _cfg.get(
-                               "<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>")
+    "project_description",
-project_version = _cfg.get("project_version", "1.1.9")
+    "<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>",
 )
 project_version = _cfg.get("project_version", "1.2.2")
 reload_debug = False
 imagemagick_path = app.get("imagemagick_path", "")
--- a/app/controllers/base.py
+++ b/app/controllers/base.py
@@ -7,14 +7,14 @@ from app.models.exception import HttpException
 def get_task_id(request: Request):
-    task_id = request.headers.get('x-task-id')
+    task_id = request.headers.get("x-task-id")
    if not task_id:
        task_id = uuid4()
    return str(task_id)
 def get_api_key(request: Request):
-    api_key = request.headers.get('x-api-key')
+    api_key = request.headers.get("x-api-key")
    return api_key
@@ -23,5 +23,9 @@ def verify_token(request: Request):
    if token != config.app.get("api_key", ""):
        request_id = get_task_id(request)
        request_url = request.url
-        user_agent = request.headers.get('user-agent')
+        user_agent = request.headers.get("user-agent")
-        raise HttpException(task_id=request_id, status_code=401, message=f"invalid token: {request_url}, {user_agent}")
+        raise HttpException(
            task_id=request_id,
            status_code=401,
            message=f"invalid token: {request_url}, {user_agent}",
        )
--- a/app/controllers/manager/base_manager.py
+++ b/app/controllers/manager/base_manager.py
@@ -18,11 +18,15 @@ class TaskManager:
                print(f"add task: {func.__name__}, current_tasks: {self.current_tasks}")
                self.execute_task(func, *args, **kwargs)
            else:
-                print(f"enqueue task: {func.__name__}, current_tasks: {self.current_tasks}")
+                print(
                    f"enqueue task: {func.__name__}, current_tasks: {self.current_tasks}"
                )
                self.enqueue({"func": func, "args": args, "kwargs": kwargs})
    def execute_task(self, func: Callable, *args: Any, **kwargs: Any):
-        thread = threading.Thread(target=self.run_task, args=(func, *args), kwargs=kwargs)
+        thread = threading.Thread(
            target=self.run_task, args=(func, *args), kwargs=kwargs
        )
        thread.start()
    def run_task(self, func: Callable, *args: Any, **kwargs: Any):
@@ -35,11 +39,14 @@ class TaskManager:
    def check_queue(self):
        with self.lock:
-            if self.current_tasks < self.max_concurrent_tasks and not self.is_queue_empty():
+            if (
                self.current_tasks < self.max_concurrent_tasks
                and not self.is_queue_empty()
            ):
                task_info = self.dequeue()
-                func = task_info['func']
+                func = task_info["func"]
-                args = task_info.get('args', ())
+                args = task_info.get("args", ())
-                kwargs = task_info.get('kwargs', {})
+                kwargs = task_info.get("kwargs", {})
                self.execute_task(func, *args, **kwargs)
    def task_done(self):
--- a/app/controllers/manager/redis_manager.py
+++ b/app/controllers/manager/redis_manager.py
@@ -8,7 +8,7 @@ from app.models.schema import VideoParams
 from app.services import task as tm
 FUNC_MAP = {
-    'start': tm.start,
+    "start": tm.start,
    # 'start_test': tm.start_test
 }
@@ -24,11 +24,15 @@ class RedisTaskManager(TaskManager):
    def enqueue(self, task: Dict):
        task_with_serializable_params = task.copy()
-        if 'params' in task['kwargs'] and isinstance(task['kwargs']['params'], VideoParams):
+        if "params" in task["kwargs"] and isinstance(
-            task_with_serializable_params['kwargs']['params'] = task['kwargs']['params'].dict()
+            task["kwargs"]["params"], VideoParams
        ):
            task_with_serializable_params["kwargs"]["params"] = task["kwargs"][
                "params"
            ].dict()
        # 将函数对象转换为其名称
-        task_with_serializable_params['func'] = task['func'].__name__
+        task_with_serializable_params["func"] = task["func"].__name__
        self.redis_client.rpush(self.queue, json.dumps(task_with_serializable_params))
    def dequeue(self):
@@ -36,10 +40,14 @@ class RedisTaskManager(TaskManager):
        if task_json:
            task_info = json.loads(task_json)
            # 将函数名称转换回函数对象
-            task_info['func'] = FUNC_MAP[task_info['func']]
+            task_info["func"] = FUNC_MAP[task_info["func"]]
-            if 'params' in task_info['kwargs'] and isinstance(task_info['kwargs']['params'], dict):
+            if "params" in task_info["kwargs"] and isinstance(
-                task_info['kwargs']['params'] = VideoParams(**task_info['kwargs']['params'])
+                task_info["kwargs"]["params"], dict
            ):
                task_info["kwargs"]["params"] = VideoParams(
                    **task_info["kwargs"]["params"]
                )
            return task_info
        return None
--- a/app/controllers/ping.py
+++ b/app/controllers/ping.py
@@ -4,6 +4,11 @@ from fastapi import Request
 router = APIRouter()
-@router.get("/ping", tags=["Health Check"], description="检查服务可用性", response_description="pong")
+@router.get(
    "/ping",
    tags=["Health Check"],
    description="检查服务可用性",
    response_description="pong",
 )
 def ping(request: Request) -> str:
    return "pong"
--- a/app/controllers/v1/base.py
+++ b/app/controllers/v1/base.py
@@ -3,8 +3,8 @@ from fastapi import APIRouter, Depends
 def new_router(dependencies=None):
    router = APIRouter()
-    router.tags = ['V1']
+    router.tags = ["V1"]
-    router.prefix = '/api/v1'
+    router.prefix = "/api/v1"
    # 将认证依赖项应用于所有路由
    if dependencies:
        router.dependencies = dependencies
--- a/app/controllers/v1/llm.py
+++ b/app/controllers/v1/llm.py
@@ -1,6 +1,11 @@
 from fastapi import Request
 from app.controllers.v1.base import new_router
-from app.models.schema import VideoScriptResponse, VideoScriptRequest, VideoTermsResponse, VideoTermsRequest
+from app.models.schema import (
    VideoScriptResponse,
    VideoScriptRequest,
    VideoTermsResponse,
    VideoTermsRequest,
 )
 from app.services import llm
 from app.utils import utils
@@ -9,23 +14,31 @@ from app.utils import utils
 router = new_router()
-@router.post("/scripts", response_model=VideoScriptResponse, summary="Create a script for the video")
+@router.post(
    "/scripts",
    response_model=VideoScriptResponse,
    summary="Create a script for the video",
 )
 def generate_video_script(request: Request, body: VideoScriptRequest):
-    video_script = llm.generate_script(video_subject=body.video_subject,
+    video_script = llm.generate_script(
        video_subject=body.video_subject,
        language=body.video_language,
-                                       paragraph_number=body.paragraph_number)
+        paragraph_number=body.paragraph_number,
-    response = {
+    )
-        "video_script": video_script
+    response = {"video_script": video_script}
    }
    return utils.get_response(200, response)
-@router.post("/terms", response_model=VideoTermsResponse, summary="Generate video terms based on the video script")
+@router.post(
    "/terms",
    response_model=VideoTermsResponse,
    summary="Generate video terms based on the video script",
 )
 def generate_video_terms(request: Request, body: VideoTermsRequest):
-    video_terms = llm.generate_terms(video_subject=body.video_subject,
+    video_terms = llm.generate_terms(
        video_subject=body.video_subject,
        video_script=body.video_script,
-                                     amount=body.amount)
+        amount=body.amount,
-    response = {
+    )
-        "video_terms": video_terms
+    response = {"video_terms": video_terms}
    }
    return utils.get_response(200, response)
--- a/app/controllers/v1/video.py
+++ b/app/controllers/v1/video.py
@@ -1,11 +1,12 @@
 import os
 import glob
 import os
 import pathlib
 import shutil
 from typing import Union
-from fastapi import Request, Depends, Path, BackgroundTasks, UploadFile
+from fastapi import BackgroundTasks, Depends, Path, Request, UploadFile
 from fastapi.responses import FileResponse, StreamingResponse
 from fastapi.params import File
 from fastapi.responses import FileResponse, StreamingResponse
 from loguru import logger
 from app.config import config
@@ -14,10 +15,19 @@ from app.controllers.manager.memory_manager import InMemoryTaskManager
 from app.controllers.manager.redis_manager import RedisTaskManager
 from app.controllers.v1.base import new_router
 from app.models.exception import HttpException
-from app.models.schema import TaskVideoRequest, TaskQueryResponse, TaskResponse, TaskQueryRequest, \
+from app.models.schema import (
-    BgmUploadResponse, BgmRetrieveResponse, TaskDeletionResponse
+    AudioRequest,
-from app.services import task as tm
+    BgmRetrieveResponse,
    BgmUploadResponse,
    SubtitleRequest,
    TaskDeletionResponse,
    TaskQueryRequest,
    TaskQueryResponse,
    TaskResponse,
    TaskVideoRequest,
 )
 from app.services import state as sm
 from app.services import task as tm
 from app.utils import utils
 # 认证依赖项
@@ -34,48 +44,65 @@ _max_concurrent_tasks = config.app.get("max_concurrent_tasks", 5)
 redis_url = f"redis://:{_redis_password}@{_redis_host}:{_redis_port}/{_redis_db}"
 # 根据配置选择合适的任务管理器
 if _enable_redis:
-    task_manager = RedisTaskManager(max_concurrent_tasks=_max_concurrent_tasks, redis_url=redis_url)
+    task_manager = RedisTaskManager(
        max_concurrent_tasks=_max_concurrent_tasks, redis_url=redis_url
    )
 else:
    task_manager = InMemoryTaskManager(max_concurrent_tasks=_max_concurrent_tasks)
 # @router.post("/videos-test", response_model=TaskResponse, summary="Generate a short video")
 # async def create_video_test(request: Request, body: TaskVideoRequest):
 #     task_id = utils.get_uuid()
 #     request_id = base.get_task_id(request)
 #     try:
 #         task = {
 #             "task_id": task_id,
 #             "request_id": request_id,
 #             "params": body.dict(),
 #         }
 #         task_manager.add_task(tm.start_test, task_id=task_id, params=body)
 #         return utils.get_response(200, task)
 #     except ValueError as e:
 #         raise HttpException(task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}")
@router.post("/videos", response_model=TaskResponse, summary="Generate a short video")
-def create_video(background_tasks: BackgroundTasks, request: Request, body: TaskVideoRequest):
+def create_video(
    background_tasks: BackgroundTasks, request: Request, body: TaskVideoRequest
 ):
    return create_task(request, body, stop_at="video")
@router.post("/subtitle", response_model=TaskResponse, summary="Generate subtitle only")
 def create_subtitle(
    background_tasks: BackgroundTasks, request: Request, body: SubtitleRequest
 ):
    return create_task(request, body, stop_at="subtitle")
@router.post("/audio", response_model=TaskResponse, summary="Generate audio only")
 def create_audio(
    background_tasks: BackgroundTasks, request: Request, body: AudioRequest
 ):
    return create_task(request, body, stop_at="audio")
 def create_task(
    request: Request,
    body: Union[TaskVideoRequest, SubtitleRequest, AudioRequest],
    stop_at: str,
 ):
    task_id = utils.get_uuid()
    request_id = base.get_task_id(request)
    try:
        task = {
            "task_id": task_id,
            "request_id": request_id,
-            "params": body.dict(),
+            "params": body.model_dump(),
        }
        sm.state.update_task(task_id)
-        # background_tasks.add_task(tm.start, task_id=task_id, params=body)
+        task_manager.add_task(tm.start, task_id=task_id, params=body, stop_at=stop_at)
-        task_manager.add_task(tm.start, task_id=task_id, params=body)
+        logger.success(f"Task created: {utils.to_json(task)}")
        logger.success(f"video created: {utils.to_json(task)}")
        return utils.get_response(200, task)
    except ValueError as e:
-        raise HttpException(task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}")
+        raise HttpException(
            task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}"
        )
-@router.get("/tasks/{task_id}", response_model=TaskQueryResponse, summary="Query task status")
+@router.get(
-def get_task(request: Request, task_id: str = Path(..., description="Task ID"),
+    "/tasks/{task_id}", response_model=TaskQueryResponse, summary="Query task status"
-             query: TaskQueryRequest = Depends()):
+)
 def get_task(
    request: Request,
    task_id: str = Path(..., description="Task ID"),
    query: TaskQueryRequest = Depends(),
 ):
    endpoint = config.app.get("endpoint", "")
    if not endpoint:
        endpoint = str(request.base_url)
@@ -108,10 +135,16 @@ def get_task(request: Request, task_id: str = Path(..., description="Task ID"),
            task["combined_videos"] = urls
        return utils.get_response(200, task)
-    raise HttpException(task_id=task_id, status_code=404, message=f"{request_id}: task not found")
+    raise HttpException(
        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
    )
-@router.delete("/tasks/{task_id}", response_model=TaskDeletionResponse, summary="Delete a generated short video task")
+@router.delete(
    "/tasks/{task_id}",
    response_model=TaskDeletionResponse,
    summary="Delete a generated short video task",
 )
 def delete_video(request: Request, task_id: str = Path(..., description="Task ID")):
    request_id = base.get_task_id(request)
    task = sm.state.get_task(task_id)
@@ -125,32 +158,40 @@ def delete_video(request: Request, task_id: str = Path(..., description="Task ID
        logger.success(f"video deleted: {utils.to_json(task)}")
        return utils.get_response(200)
-    raise HttpException(task_id=task_id, status_code=404, message=f"{request_id}: task not found")
+    raise HttpException(
        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
    )
-@router.get("/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files")
+@router.get(
    "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
 )
 def get_bgm_list(request: Request):
    suffix = "*.mp3"
    song_dir = utils.song_dir()
    files = glob.glob(os.path.join(song_dir, suffix))
    bgm_list = []
    for file in files:
-        bgm_list.append({
+        bgm_list.append(
            {
                "name": os.path.basename(file),
                "size": os.path.getsize(file),
                "file": file,
        })
    response = {
        "files": bgm_list
            }
        )
    response = {"files": bgm_list}
    return utils.get_response(200, response)
-@router.post("/musics", response_model=BgmUploadResponse, summary="Upload the BGM file to the songs directory")
+@router.post(
    "/musics",
    response_model=BgmUploadResponse,
    summary="Upload the BGM file to the songs directory",
 )
 def upload_bgm_file(request: Request, file: UploadFile = File(...)):
    request_id = base.get_task_id(request)
    # check file ext
-    if file.filename.endswith('mp3'):
+    if file.filename.endswith("mp3"):
        song_dir = utils.song_dir()
        save_path = os.path.join(song_dir, file.filename)
        # save file
@@ -158,26 +199,26 @@ def upload_bgm_file(request: Request, file: UploadFile = File(...)):
            # If the file already exists, it will be overwritten
            file.file.seek(0)
            buffer.write(file.file.read())
-        response = {
+        response = {"file": save_path}
            "file": save_path
        }
        return utils.get_response(200, response)
-    raise HttpException('', status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded")
+    raise HttpException(
        "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
    )
@router.get("/stream/{file_path:path}")
 async def stream_video(request: Request, file_path: str):
    tasks_dir = utils.task_dir()
    video_path = os.path.join(tasks_dir, file_path)
-    range_header = request.headers.get('Range')
+    range_header = request.headers.get("Range")
    video_size = os.path.getsize(video_path)
    start, end = 0, video_size - 1
    length = video_size
    if range_header:
-        range_ = range_header.split('bytes=')[1]
+        range_ = range_header.split("bytes=")[1]
-        start, end = [int(part) if part else None for part in range_.split('-')]
+        start, end = [int(part) if part else None for part in range_.split("-")]
        if start is None:
            start = video_size - end
            end = video_size - 1
@@ -186,7 +227,7 @@ async def stream_video(request: Request, file_path: str):
        length = end - start + 1
    def file_iterator(file_path, offset=0, bytes_to_read=None):
-        with open(file_path, 'rb') as f:
+        with open(file_path, "rb") as f:
            f.seek(offset, os.SEEK_SET)
            remaining = bytes_to_read or video_size
            while remaining > 0:
@@ -197,10 +238,12 @@ async def stream_video(request: Request, file_path: str):
                remaining -= len(data)
                yield data
-    response = StreamingResponse(file_iterator(video_path, start, length), media_type='video/mp4')
+    response = StreamingResponse(
-    response.headers['Content-Range'] = f'bytes {start}-{end}/{video_size}'
+        file_iterator(video_path, start, length), media_type="video/mp4"
-    response.headers['Accept-Ranges'] = 'bytes'
+    )
-    response.headers['Content-Length'] = str(length)
+    response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
    response.headers["Accept-Ranges"] = "bytes"
    response.headers["Content-Length"] = str(length)
    response.status_code = 206  # Partial Content
    return response
@@ -219,8 +262,10 @@ async def download_video(_: Request, file_path: str):
    file_path = pathlib.Path(video_path)
    filename = file_path.stem
    extension = file_path.suffix
-    headers = {
+    headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
-        "Content-Disposition": f"attachment; filename={filename}{extension}"
+    return FileResponse(
-    }
+        path=video_path,
-    return FileResponse(path=video_path, headers=headers, filename=f"{filename}{extension}",
+        headers=headers,
-                        media_type=f'video/{extension[1:]}')
+        filename=f"{filename}{extension}",
        media_type=f"video/{extension[1:]}",
    )
--- a/app/models/const.py
+++ b/app/models/const.py
@@ -1,11 +1,25 @@
 PUNCTUATIONS = [
-    "?", ",", ".", "、", ";", ":", "!", "…",
+    "?",
-    "？", "，", "。", "、", "；", "：", "！", "...",
+    ",",
    ".",
    "、",
    ";",
    ":",
    "!",
    "…",
    "？",
    "，",
    "。",
    "、",
    "；",
    "：",
    "！",
    "...",
 ]
 TASK_STATE_FAILED = -1
 TASK_STATE_COMPLETE = 1
 TASK_STATE_PROCESSING = 4
-FILE_TYPE_VIDEOS = ['mp4', 'mov', 'mkv', 'webm']
+FILE_TYPE_VIDEOS = ["mp4", "mov", "mkv", "webm"]
-FILE_TYPE_IMAGES = ['jpg', 'jpeg', 'png', 'bmp']
+FILE_TYPE_IMAGES = ["jpg", "jpeg", "png", "bmp"]
--- a/app/models/exception.py
+++ b/app/models/exception.py
@@ -5,16 +5,18 @@ from loguru import logger
 class HttpException(Exception):
-    def __init__(self, task_id: str, status_code: int, message: str = '', data: Any = None):
+    def __init__(
        self, task_id: str, status_code: int, message: str = "", data: Any = None
    ):
        self.message = message
        self.status_code = status_code
        self.data = data
        # 获取异常堆栈信息
        tb_str = traceback.format_exc().strip()
        if not tb_str or tb_str == "NoneType: None":
-            msg = f'HttpException: {status_code}, {task_id}, {message}'
+            msg = f"HttpException: {status_code}, {task_id}, {message}"
        else:
-            msg = f'HttpException: {status_code}, {task_id}, {message}\n{tb_str}'
+            msg = f"HttpException: {status_code}, {task_id}, {message}\n{tb_str}"
        if status_code == 400:
            logger.warning(msg)
--- a/app/models/schema.py
+++ b/app/models/schema.py
@@ -1,12 +1,16 @@
 import warnings
 from enum import Enum
-from typing import Any, Optional, List
+from typing import Any, List, Optional, Union
 import pydantic
 from pydantic import BaseModel
 import warnings
 # 忽略 Pydantic 的特定警告
-warnings.filterwarnings("ignore", category=UserWarning, message="Field name.*shadows an attribute in parent.*")
+warnings.filterwarnings(
    "ignore",
    category=UserWarning,
    message="Field name.*shadows an attribute in parent.*",
 )
 class VideoConcatMode(str, Enum):
@@ -61,7 +65,6 @@ class MaterialInfo:
 #     # "male-zh-TW-YunJheNeural",
 #
 #     # en-US
 #
 #     "female-en-US-AnaNeural",
 #     "female-en-US-AriaNeural",
 #     "female-en-US-AvaNeural",
@@ -93,6 +96,7 @@ class VideoParams(BaseModel):
      "stroke_width": 1.5
    }
    """
    video_subject: str
    video_script: str = ""  # 用于生成视频的脚本
    video_terms: Optional[str | list] = None  # 用于生成视频的关键词
@@ -108,15 +112,17 @@ class VideoParams(BaseModel):
    voice_name: Optional[str] = ""
    voice_volume: Optional[float] = 1.0
    voice_rate: Optional[float] = 1.0
    bgm_type: Optional[str] = "random"
    bgm_file: Optional[str] = ""
    bgm_volume: Optional[float] = 0.2
    subtitle_enabled: Optional[bool] = True
    subtitle_position: Optional[str] = "bottom"  # top, bottom, center
    custom_position: float = 70.0
    font_name: Optional[str] = "STHeitiMedium.ttc"
    text_fore_color: Optional[str] = "#FFFFFF"
-    text_background_color: Optional[str] = "transparent"
+    text_background_color: Union[bool, str] = True
    font_size: int = 60
    stroke_color: Optional[str] = "#000000"
@@ -125,6 +131,38 @@ class VideoParams(BaseModel):
    paragraph_number: Optional[int] = 1
 class SubtitleRequest(BaseModel):
    video_script: str
    video_language: Optional[str] = ""
    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
    voice_volume: Optional[float] = 1.0
    voice_rate: Optional[float] = 1.2
    bgm_type: Optional[str] = "random"
    bgm_file: Optional[str] = ""
    bgm_volume: Optional[float] = 0.2
    subtitle_position: Optional[str] = "bottom"
    font_name: Optional[str] = "STHeitiMedium.ttc"
    text_fore_color: Optional[str] = "#FFFFFF"
    text_background_color: Union[bool, str] = True
    font_size: int = 60
    stroke_color: Optional[str] = "#000000"
    stroke_width: float = 1.5
    video_source: Optional[str] = "local"
    subtitle_enabled: Optional[str] = "true"
 class AudioRequest(BaseModel):
    video_script: str
    video_language: Optional[str] = ""
    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
    voice_volume: Optional[float] = 1.0
    voice_rate: Optional[float] = 1.2
    bgm_type: Optional[str] = "random"
    bgm_file: Optional[str] = ""
    bgm_volume: Optional[float] = 0.2
    video_source: Optional[str] = "local"
 class VideoScriptParams:
    """
    {
@@ -133,6 +171,7 @@ class VideoScriptParams:
      "paragraph_number": 1
    }
    """
    video_subject: Optional[str] = "春天的花海"
    video_language: Optional[str] = ""
    paragraph_number: Optional[int] = 1
@@ -146,14 +185,17 @@ class VideoTermsParams:
      "amount": 5
    }
    """
    video_subject: Optional[str] = "春天的花海"
-    video_script: Optional[str] = "春天的花海，如诗如画般展现在眼前。万物复苏的季节里，大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
+    video_script: Optional[str] = (
        "春天的花海，如诗如画般展现在眼前。万物复苏的季节里，大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
    )
    amount: Optional[int] = 5
 class BaseResponse(BaseModel):
    status: int = 200
-    message: Optional[str] = 'success'
+    message: Optional[str] = "success"
    data: Any = None
@@ -188,9 +230,7 @@ class TaskResponse(BaseResponse):
            "example": {
                "status": 200,
                "message": "success",
-                "data": {
+                "data": {"task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"},
                    "task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"
                }
            },
        }
@@ -209,8 +249,8 @@ class TaskQueryResponse(BaseResponse):
                    ],
                    "combined_videos": [
                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
-                    ]
+                    ],
-                }
+                },
            },
        }
@@ -229,8 +269,8 @@ class TaskDeletionResponse(BaseResponse):
                    ],
                    "combined_videos": [
                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
-                    ]
+                    ],
-                }
+                },
            },
        }
@@ -243,7 +283,7 @@ class VideoScriptResponse(BaseResponse):
                "message": "success",
                "data": {
                    "video_script": "春天的花海，是大自然的一幅美丽画卷。在这个季节里，大地复苏，万物生长，花朵争相绽放，形成了一片五彩斑斓的花海..."
-                }
+                },
            },
        }
@@ -254,9 +294,7 @@ class VideoTermsResponse(BaseResponse):
            "example": {
                "status": 200,
                "message": "success",
-                "data": {
+                "data": {"video_terms": ["sky", "tree"]},
                    "video_terms": ["sky", "tree"]
                }
            },
        }
@@ -272,10 +310,10 @@ class BgmRetrieveResponse(BaseResponse):
                        {
                            "name": "output013.mp3",
                            "size": 1891269,
-                            "file": "/MoneyPrinterTurbo/resource/songs/output013.mp3"
+                            "file": "/MoneyPrinterTurbo/resource/songs/output013.mp3",
                        }
                    ]
-                }
+                },
            },
        }
@@ -286,8 +324,6 @@ class BgmUploadResponse(BaseResponse):
            "example": {
                "status": 200,
                "message": "success",
-                "data": {
+                "data": {"file": "/MoneyPrinterTurbo/resource/songs/example.mp3"},
                    "file": "/MoneyPrinterTurbo/resource/songs/example.mp3"
                }
            },
        }
--- a/app/router.py
+++ b/app/router.py
@@ -6,9 +6,10 @@ Resources:
    1. https://fastapi.tiangolo.com/tutorial/bigger-applications
 """
 from fastapi import APIRouter
-from app.controllers.v1 import video, llm
+from app.controllers.v1 import llm, video
 root_api_router = APIRouter()
 # v1
--- a/app/services/llm.py
+++ b/app/services/llm.py
@@ -21,6 +21,7 @@ def _generate_response(prompt: str) -> str:
        if not model_name:
            model_name = "gpt-3.5-turbo-16k-0613"
        import g4f
        content = g4f.ChatCompletion.create(
            model=model_name,
            messages=[{"role": "user", "content": prompt}],
@@ -72,43 +73,62 @@ def _generate_response(prompt: str) -> str:
            base_url = config.app.get("deepseek_base_url")
            if not base_url:
                base_url = "https://api.deepseek.com"
        elif llm_provider == "ernie":
            api_key = config.app.get("ernie_api_key")
            secret_key = config.app.get("ernie_secret_key")
            base_url = config.app.get("ernie_base_url")
            model_name = "***"
            if not secret_key:
                raise ValueError(
                    f"{llm_provider}: secret_key is not set, please set it in the config.toml file."
                )
        else:
-            raise ValueError("llm_provider is not set, please set it in the config.toml file.")
+            raise ValueError(
                "llm_provider is not set, please set it in the config.toml file."
            )
        if not api_key:
-            raise ValueError(f"{llm_provider}: api_key is not set, please set it in the config.toml file.")
+            raise ValueError(
                f"{llm_provider}: api_key is not set, please set it in the config.toml file."
            )
        if not model_name:
-            raise ValueError(f"{llm_provider}: model_name is not set, please set it in the config.toml file.")
+            raise ValueError(
                f"{llm_provider}: model_name is not set, please set it in the config.toml file."
            )
        if not base_url:
-            raise ValueError(f"{llm_provider}: base_url is not set, please set it in the config.toml file.")
+            raise ValueError(
                f"{llm_provider}: base_url is not set, please set it in the config.toml file."
            )
        if llm_provider == "qwen":
            import dashscope
            from dashscope.api_entities.dashscope_response import GenerationResponse
            dashscope.api_key = api_key
            response = dashscope.Generation.call(
-                model=model_name,
+                model=model_name, messages=[{"role": "user", "content": prompt}]
                messages=[{"role": "user", "content": prompt}]
            )
            if response:
                if isinstance(response, GenerationResponse):
                    status_code = response.status_code
                    if status_code != 200:
                        raise Exception(
-                            f"[{llm_provider}] returned an error response: \"{response}\"")
+                            f'[{llm_provider}] returned an error response: "{response}"'
                        )
                    content = response["output"]["text"]
                    return content.replace("\n", "")
                else:
                    raise Exception(
-                        f"[{llm_provider}] returned an invalid response: \"{response}\"")
+                        f'[{llm_provider}] returned an invalid response: "{response}"'
                    )
            else:
-                raise Exception(
+                raise Exception(f"[{llm_provider}] returned an empty response")
                    f"[{llm_provider}] returned an empty response")
        if llm_provider == "gemini":
            import google.generativeai as genai
-            genai.configure(api_key=api_key, transport='rest')
+
            genai.configure(api_key=api_key, transport="rest")
            generation_config = {
                "temperature": 0.5,
@@ -120,25 +140,27 @@ def _generate_response(prompt: str) -> str:
            safety_settings = [
                {
                    "category": "HARM_CATEGORY_HARASSMENT",
-                    "threshold": "BLOCK_ONLY_HIGH"
+                    "threshold": "BLOCK_ONLY_HIGH",
                },
                {
                    "category": "HARM_CATEGORY_HATE_SPEECH",
-                    "threshold": "BLOCK_ONLY_HIGH"
+                    "threshold": "BLOCK_ONLY_HIGH",
                },
                {
                    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
-                    "threshold": "BLOCK_ONLY_HIGH"
+                    "threshold": "BLOCK_ONLY_HIGH",
                },
                {
                    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
-                    "threshold": "BLOCK_ONLY_HIGH"
+                    "threshold": "BLOCK_ONLY_HIGH",
                },
            ]
-            model = genai.GenerativeModel(model_name=model_name,
+            model = genai.GenerativeModel(
                model_name=model_name,
                generation_config=generation_config,
-                                          safety_settings=safety_settings)
+                safety_settings=safety_settings,
            )
            try:
                response = model.generate_content(prompt)
@@ -151,20 +173,54 @@ def _generate_response(prompt: str) -> str:
        if llm_provider == "cloudflare":
            import requests
            response = requests.post(
                f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model_name}",
                headers={"Authorization": f"Bearer {api_key}"},
                json={
                    "messages": [
                        {"role": "system", "content": "You are a friendly assistant"},
-                        {"role": "user", "content": prompt}
+                        {"role": "user", "content": prompt},
                    ]
-                }
+                },
            )
            result = response.json()
            logger.info(result)
            return result["result"]["response"]
        if llm_provider == "ernie":
            import requests
            params = {
                "grant_type": "client_credentials",
                "client_id": api_key,
                "client_secret": secret_key,
            }
            access_token = (
                requests.post("https://aip.baidubce.com/oauth/2.0/token", params=params)
                .json()
                .get("access_token")
            )
            url = f"{base_url}?access_token={access_token}"
            payload = json.dumps(
                {
                    "messages": [{"role": "user", "content": prompt}],
                    "temperature": 0.5,
                    "top_p": 0.8,
                    "penalty_score": 1,
                    "disable_search": False,
                    "enable_citation": False,
                    "response_format": "text",
                }
            )
            headers = {"Content-Type": "application/json"}
            response = requests.request(
                "POST", url, headers=headers, data=payload
            ).json()
            return response.get("result")
        if llm_provider == "azure":
            client = AzureOpenAI(
                api_key=api_key,
@@ -178,24 +234,27 @@ def _generate_response(prompt: str) -> str:
            )
        response = client.chat.completions.create(
-            model=model_name,
+            model=model_name, messages=[{"role": "user", "content": prompt}]
            messages=[{"role": "user", "content": prompt}]
        )
        if response:
            if isinstance(response, ChatCompletion):
                content = response.choices[0].message.content
            else:
                raise Exception(
-                    f"[{llm_provider}] returned an invalid response: \"{response}\", please check your network "
+                    f'[{llm_provider}] returned an invalid response: "{response}", please check your network '
-                    f"connection and try again.")
+                    f"connection and try again."
                )
        else:
            raise Exception(
-                f"[{llm_provider}] returned an empty response, please check your network connection and try again.")
+                f"[{llm_provider}] returned an empty response, please check your network connection and try again."
            )
    return content.replace("\n", "")
-def generate_script(video_subject: str, language: str = "", paragraph_number: int = 1) -> str:
+def generate_script(
    video_subject: str, language: str = "", paragraph_number: int = 1
 ) -> str:
    prompt = f"""
 # Role: Video Script Generator
@@ -239,7 +298,7 @@ Generate a script for a video, depending on the subject of the video.
        selected_paragraphs = paragraphs[:paragraph_number]
        # Join the selected paragraphs into a single string
-        return "\n\n".join(selected_paragraphs)
+        return "\n\n".join(paragraphs)
    for i in range(_max_retries):
        try:
@@ -295,20 +354,26 @@ Please note that you must use English for generating video search terms; Chinese
    logger.info(f"subject: {video_subject}")
    search_terms = []
    response = ""
    for i in range(_max_retries):
        try:
            response = _generate_response(prompt)
            search_terms = json.loads(response)
-            if not isinstance(search_terms, list) or not all(isinstance(term, str) for term in search_terms):
+            if not isinstance(search_terms, list) or not all(
                isinstance(term, str) for term in search_terms
            ):
                logger.error("response is not a list of strings.")
                continue
        except Exception as e:
-            match = re.search(r'\[.*]', response)
+            logger.warning(f"failed to generate video terms: {str(e)}")
            if response:
                match = re.search(r"\[.*]", response)
                if match:
                    try:
                        search_terms = json.loads(match.group())
-                except json.JSONDecodeError:
+                    except Exception as e:
                        logger.warning(f"failed to generate video terms: {str(e)}")
                        pass
        if search_terms and len(search_terms) > 0:
@@ -322,9 +387,13 @@ Please note that you must use English for generating video search terms; Chinese
 if __name__ == "__main__":
    video_subject = "生命的意义是什么"
-    script = generate_script(video_subject=video_subject, language="zh-CN", paragraph_number=1)
+    script = generate_script(
        video_subject=video_subject, language="zh-CN", paragraph_number=1
    )
    print("######################")
    print(script)
-    search_terms = generate_terms(video_subject=video_subject, video_script=script, amount=5)
+    search_terms = generate_terms(
        video_subject=video_subject, video_script=script, amount=5
    )
    print("######################")
    print(search_terms)
--- a/app/services/material.py
+++ b/app/services/material.py
@@ -19,7 +19,8 @@ def get_api_key(cfg_key: str):
    if not api_keys:
        raise ValueError(
            f"\n\n##### {cfg_key} is not set #####\n\nPlease set it in the config.toml file: {config.config_file}\n\n"
-            f"{utils.to_json(config.app)}")
+            f"{utils.to_json(config.app)}"
        )
    # if only one key is provided, return it
    if isinstance(api_keys, str):
@@ -30,28 +31,32 @@ def get_api_key(cfg_key: str):
    return api_keys[requested_count % len(api_keys)]
-def search_videos_pexels(search_term: str,
+def search_videos_pexels(
    search_term: str,
    minimum_duration: int,
    video_aspect: VideoAspect = VideoAspect.portrait,
-                         ) -> List[MaterialInfo]:
+) -> List[MaterialInfo]:
    aspect = VideoAspect(video_aspect)
    video_orientation = aspect.name
    video_width, video_height = aspect.to_resolution()
    api_key = get_api_key("pexels_api_keys")
    headers = {
-        "Authorization": api_key
+        "Authorization": api_key,
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
    }
    # Build URL
-    params = {
+    params = {"query": search_term, "per_page": 20, "orientation": video_orientation}
        "query": search_term,
        "per_page": 20,
        "orientation": video_orientation
    }
    query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}"
    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
    try:
-        r = requests.get(query_url, headers=headers, proxies=config.proxy, verify=False, timeout=(30, 60))
+        r = requests.get(
            query_url,
            headers=headers,
            proxies=config.proxy,
            verify=False,
            timeout=(30, 60),
        )
        response = r.json()
        video_items = []
        if "videos" not in response:
@@ -83,10 +88,11 @@ def search_videos_pexels(search_term: str,
    return []
-def search_videos_pixabay(search_term: str,
+def search_videos_pixabay(
    search_term: str,
    minimum_duration: int,
    video_aspect: VideoAspect = VideoAspect.portrait,
-                          ) -> List[MaterialInfo]:
+) -> List[MaterialInfo]:
    aspect = VideoAspect(video_aspect)
    video_width, video_height = aspect.to_resolution()
@@ -97,13 +103,15 @@ def search_videos_pixabay(search_term: str,
        "q": search_term,
        "video_type": "all",  # Accepted values: "all", "film", "animation"
        "per_page": 50,
-        "key": api_key
+        "key": api_key,
    }
    query_url = f"https://pixabay.com/api/videos/?{urlencode(params)}"
    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")
    try:
-        r = requests.get(query_url, proxies=config.proxy, verify=False, timeout=(30, 60))
+        r = requests.get(
            query_url, proxies=config.proxy, verify=False, timeout=(30, 60)
        )
        response = r.json()
        video_items = []
        if "hits" not in response:
@@ -153,9 +161,17 @@ def save_video(video_url: str, save_dir: str = "") -> str:
        logger.info(f"video already exists: {video_path}")
        return video_path
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
    }
    # if video does not exist, download it
    with open(video_path, "wb") as f:
-        f.write(requests.get(video_url, proxies=config.proxy, verify=False, timeout=(60, 240)).content)
+        f.write(
            requests.get(
                video_url, headers=headers, proxies=config.proxy, verify=False, timeout=(60, 240)
            ).content
        )
    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
        try:
@@ -174,14 +190,15 @@ def save_video(video_url: str, save_dir: str = "") -> str:
    return ""
-def download_videos(task_id: str,
+def download_videos(
    task_id: str,
    search_terms: List[str],
    source: str = "pexels",
    video_aspect: VideoAspect = VideoAspect.portrait,
    video_contact_mode: VideoConcatMode = VideoConcatMode.random,
    audio_duration: float = 0.0,
    max_clip_duration: int = 5,
-                    ) -> List[str]:
+) -> List[str]:
    valid_video_items = []
    valid_video_urls = []
    found_duration = 0.0
@@ -190,9 +207,11 @@ def download_videos(task_id: str,
        search_videos = search_videos_pixabay
    for search_term in search_terms:
-        video_items = search_videos(search_term=search_term,
+        video_items = search_videos(
            search_term=search_term,
            minimum_duration=max_clip_duration,
-                                    video_aspect=video_aspect)
+            video_aspect=video_aspect,
        )
        logger.info(f"found {len(video_items)} videos for '{search_term}'")
        for item in video_items:
@@ -202,7 +221,8 @@ def download_videos(task_id: str,
                found_duration += item.duration
    logger.info(
-        f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds")
+        f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds"
    )
    video_paths = []
    material_directory = config.app.get("material_directory", "").strip()
@@ -218,14 +238,18 @@ def download_videos(task_id: str,
    for item in valid_video_items:
        try:
            logger.info(f"downloading video: {item.url}")
-            saved_video_path = save_video(video_url=item.url, save_dir=material_directory)
+            saved_video_path = save_video(
                video_url=item.url, save_dir=material_directory
            )
            if saved_video_path:
                logger.info(f"video saved: {saved_video_path}")
                video_paths.append(saved_video_path)
                seconds = min(max_clip_duration, item.duration)
                total_duration += seconds
                if total_duration > audio_duration:
-                    logger.info(f"total duration of downloaded videos: {total_duration} seconds, skip downloading more")
+                    logger.info(
                        f"total duration of downloaded videos: {total_duration} seconds, skip downloading more"
                    )
                    break
        except Exception as e:
            logger.error(f"failed to download video: {utils.to_json(item)} => {str(e)}")
@@ -234,4 +258,6 @@ def download_videos(task_id: str,
 if __name__ == "__main__":
-    download_videos("test123", ["Money Exchange Medium"], audio_duration=100, source="pixabay")
+    download_videos(
        "test123", ["Money Exchange Medium"], audio_duration=100, source="pixabay"
    )
--- a/app/services/state.py
+++ b/app/services/state.py
@@ -6,7 +6,6 @@ from app.models import const
 # Base class for state management
 class BaseState(ABC):
    @abstractmethod
    def update_task(self, task_id: str, state: int, progress: int = 0, **kwargs):
        pass
@@ -18,11 +17,16 @@ class BaseState(ABC):
 # Memory state management
 class MemoryState(BaseState):
    def __init__(self):
        self._tasks = {}
-    def update_task(self, task_id: str, state: int = const.TASK_STATE_PROCESSING, progress: int = 0, **kwargs):
+    def update_task(
        self,
        task_id: str,
        state: int = const.TASK_STATE_PROCESSING,
        progress: int = 0,
        **kwargs,
    ):
        progress = int(progress)
        if progress > 100:
            progress = 100
@@ -43,12 +47,18 @@ class MemoryState(BaseState):
 # Redis state management
 class RedisState(BaseState):
-
+    def __init__(self, host="localhost", port=6379, db=0, password=None):
    def __init__(self, host='localhost', port=6379, db=0, password=None):
        import redis
        self._redis = redis.StrictRedis(host=host, port=port, db=db, password=password)
-    def update_task(self, task_id: str, state: int = const.TASK_STATE_PROCESSING, progress: int = 0, **kwargs):
+    def update_task(
        self,
        task_id: str,
        state: int = const.TASK_STATE_PROCESSING,
        progress: int = 0,
        **kwargs,
    ):
        progress = int(progress)
        if progress > 100:
            progress = 100
@@ -67,7 +77,10 @@ class RedisState(BaseState):
        if not task_data:
            return None
-        task = {key.decode('utf-8'): self._convert_to_original_type(value) for key, value in task_data.items()}
+        task = {
            key.decode("utf-8"): self._convert_to_original_type(value)
            for key, value in task_data.items()
        }
        return task
    def delete_task(self, task_id: str):
@@ -79,7 +92,7 @@ class RedisState(BaseState):
        Convert the value from byte string to its original data type.
        You can extend this method to handle other data types as needed.
        """
-        value_str = value.decode('utf-8')
+        value_str = value.decode("utf-8")
        try:
            # try to convert byte string array to list
@@ -100,4 +113,10 @@ _redis_port = config.app.get("redis_port", 6379)
 _redis_db = config.app.get("redis_db", 0)
 _redis_password = config.app.get("redis_password", None)
-state = RedisState(host=_redis_host, port=_redis_port, db=_redis_db, password=_redis_password) if _enable_redis else MemoryState()
+state = (
    RedisState(
        host=_redis_host, port=_redis_port, db=_redis_db, password=_redis_password
    )
    if _enable_redis
    else MemoryState()
 )
--- a/app/services/subtitle.py
+++ b/app/services/subtitle.py
@@ -23,18 +23,22 @@ def create(audio_file, subtitle_file: str = ""):
        if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
            model_path = model_size
-        logger.info(f"loading model: {model_path}, device: {device}, compute_type: {compute_type}")
+        logger.info(
            f"loading model: {model_path}, device: {device}, compute_type: {compute_type}"
        )
        try:
-            model = WhisperModel(model_size_or_path=model_path,
+            model = WhisperModel(
-                                 device=device,
+                model_size_or_path=model_path, device=device, compute_type=compute_type
-                                 compute_type=compute_type)
+            )
        except Exception as e:
-            logger.error(f"failed to load model: {e} \n\n"
+            logger.error(
                f"failed to load model: {e} \n\n"
                f"********************************************\n"
                f"this may be caused by network issue. \n"
                f"please download the model manually and put it in the 'models' folder. \n"
                f"see [README.md FAQ](https://github.com/harry0703/MoneyPrinterTurbo) for more details.\n"
-                         f"********************************************\n\n")
+                f"********************************************\n\n"
            )
            return None
    logger.info(f"start, output file: {subtitle_file}")
@@ -49,7 +53,9 @@ def create(audio_file, subtitle_file: str = ""):
        vad_parameters=dict(min_silence_duration_ms=500),
    )
-    logger.info(f"detected language: '{info.language}', probability: {info.language_probability:.2f}")
+    logger.info(
        f"detected language: '{info.language}', probability: {info.language_probability:.2f}"
    )
    start = timer()
    subtitles = []
@@ -62,11 +68,9 @@ def create(audio_file, subtitle_file: str = ""):
        msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
        logger.debug(msg)
-        subtitles.append({
+        subtitles.append(
-            "msg": seg_text,
+            {"msg": seg_text, "start_time": seg_start, "end_time": seg_end}
-            "start_time": seg_start,
+        )
            "end_time": seg_end
        })
    for segment in segments:
        words_idx = 0
@@ -119,7 +123,11 @@ def create(audio_file, subtitle_file: str = ""):
    for subtitle in subtitles:
        text = subtitle.get("msg")
        if text:
-            lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time")))
+            lines.append(
                utils.text_to_srt(
                    idx, text, subtitle.get("start_time"), subtitle.get("end_time")
                )
            )
            idx += 1
    sub = "\n".join(lines) + "\n"
@@ -136,12 +144,12 @@ def file_to_subtitles(filename):
    current_times = None
    current_text = ""
    index = 0
-    with open(filename, 'r', encoding="utf-8") as f:
+    with open(filename, "r", encoding="utf-8") as f:
        for line in f:
            times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
            if times:
                current_times = line
-            elif line.strip() == '' and current_times:
+            elif line.strip() == "" and current_times:
                index += 1
                times_texts.append((index, current_times.strip(), current_text.strip()))
                current_times, current_text = None, ""
@@ -150,27 +158,124 @@ def file_to_subtitles(filename):
    return times_texts
 def levenshtein_distance(s1, s2):
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)
    if len(s2) == 0:
        return len(s1)
    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    return previous_row[-1]
 def similarity(a, b):
    distance = levenshtein_distance(a.lower(), b.lower())
    max_length = max(len(a), len(b))
    return 1 - (distance / max_length)
 def correct(subtitle_file, video_script):
    subtitle_items = file_to_subtitles(subtitle_file)
    script_lines = utils.split_string_by_punctuations(video_script)
    corrected = False
-    if len(subtitle_items) == len(script_lines):
+    new_subtitle_items = []
-        for i in range(len(script_lines)):
+    script_index = 0
-            script_line = script_lines[i].strip()
+    subtitle_index = 0
-            subtitle_line = subtitle_items[i][2]
+
-            if script_line != subtitle_line:
+    while script_index < len(script_lines) and subtitle_index < len(subtitle_items):
-                logger.warning(f"line {i + 1}, script: {script_line}, subtitle: {subtitle_line}")
+        script_line = script_lines[script_index].strip()
-                subtitle_items[i] = (subtitle_items[i][0], subtitle_items[i][1], script_line)
+        subtitle_line = subtitle_items[subtitle_index][2].strip()
        if script_line == subtitle_line:
            new_subtitle_items.append(subtitle_items[subtitle_index])
            script_index += 1
            subtitle_index += 1
        else:
            combined_subtitle = subtitle_line
            start_time = subtitle_items[subtitle_index][1].split(" --> ")[0]
            end_time = subtitle_items[subtitle_index][1].split(" --> ")[1]
            next_subtitle_index = subtitle_index + 1
            while next_subtitle_index < len(subtitle_items):
                next_subtitle = subtitle_items[next_subtitle_index][2].strip()
                if similarity(
                    script_line, combined_subtitle + " " + next_subtitle
                ) > similarity(script_line, combined_subtitle):
                    combined_subtitle += " " + next_subtitle
                    end_time = subtitle_items[next_subtitle_index][1].split(" --> ")[1]
                    next_subtitle_index += 1
                else:
                    break
            if similarity(script_line, combined_subtitle) > 0.8:
                logger.warning(
                    f"Merged/Corrected - Script: {script_line}, Subtitle: {combined_subtitle}"
                )
                new_subtitle_items.append(
                    (
                        len(new_subtitle_items) + 1,
                        f"{start_time} --> {end_time}",
                        script_line,
                    )
                )
                corrected = True
            else:
                logger.warning(
                    f"Mismatch - Script: {script_line}, Subtitle: {combined_subtitle}"
                )
                new_subtitle_items.append(
                    (
                        len(new_subtitle_items) + 1,
                        f"{start_time} --> {end_time}",
                        script_line,
                    )
                )
                corrected = True
            script_index += 1
            subtitle_index = next_subtitle_index
    # 处理剩余的脚本行
    while script_index < len(script_lines):
        logger.warning(f"Extra script line: {script_lines[script_index]}")
        if subtitle_index < len(subtitle_items):
            new_subtitle_items.append(
                (
                    len(new_subtitle_items) + 1,
                    subtitle_items[subtitle_index][1],
                    script_lines[script_index],
                )
            )
            subtitle_index += 1
        else:
            new_subtitle_items.append(
                (
                    len(new_subtitle_items) + 1,
                    "00:00:00,000 --> 00:00:00,000",
                    script_lines[script_index],
                )
            )
        script_index += 1
        corrected = True
    if corrected:
        with open(subtitle_file, "w", encoding="utf-8") as fd:
-            for item in subtitle_items:
+            for i, item in enumerate(new_subtitle_items):
-                fd.write(f"{item[0]}\n{item[1]}\n{item[2]}\n\n")
+                fd.write(f"{i + 1}\n{item[1]}\n{item[2]}\n\n")
-        logger.info(f"subtitle corrected")
+        logger.info("Subtitle corrected")
    else:
-        logger.success(f"subtitle is correct")
+        logger.success("Subtitle is correct")
 if __name__ == "__main__":
--- a/app/services/task.py
+++ b/app/services/task.py
@@ -3,61 +3,47 @@ import os.path
 import re
 from os import path
 from edge_tts import SubMaker
 from loguru import logger
 from app.config import config
 from app.models import const
-from app.models.schema import VideoParams, VideoConcatMode
+from app.models.schema import VideoConcatMode, VideoParams
-from app.services import llm, material, voice, video, subtitle
+from app.services import llm, material, subtitle, video, voice
 from app.services import state as sm
 from app.utils import utils
-def start(task_id, params: VideoParams):
+def generate_script(task_id, params):
    """
    {
        "video_subject": "",
        "video_aspect": "横屏 16:9（西瓜视频）",
        "voice_name": "女生-晓晓",
        "enable_bgm": false,
        "font_name": "STHeitiMedium 黑体-中",
        "text_color": "#FFFFFF",
        "font_size": 60,
        "stroke_color": "#000000",
        "stroke_width": 1.5
    }
    """
    logger.info(f"start task: {task_id}")
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
    video_subject = params.video_subject
    voice_name = voice.parse_voice_name(params.voice_name)
    paragraph_number = params.paragraph_number
    n_threads = params.n_threads
    max_clip_duration = params.video_clip_duration
    logger.info("\n\n## generating video script")
    video_script = params.video_script.strip()
    if not video_script:
-        video_script = llm.generate_script(video_subject=video_subject, language=params.video_language,
+        video_script = llm.generate_script(
-                                           paragraph_number=paragraph_number)
+            video_subject=params.video_subject,
            language=params.video_language,
            paragraph_number=params.paragraph_number,
        )
    else:
        logger.debug(f"video script: \n{video_script}")
    if not video_script:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        logger.error("failed to generate video script.")
-        return
+        return None
-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
+    return video_script
 def generate_terms(task_id, params, video_script):
    logger.info("\n\n## generating video terms")
    video_terms = params.video_terms
    if not video_terms:
-        video_terms = llm.generate_terms(video_subject=video_subject, video_script=video_script, amount=5)
+        video_terms = llm.generate_terms(
            video_subject=params.video_subject, video_script=video_script, amount=5
        )
    else:
        if isinstance(video_terms, str):
-            video_terms = [term.strip() for term in re.split(r'[,，]', video_terms)]
+            video_terms = [term.strip() for term in re.split(r"[,，]", video_terms)]
        elif isinstance(video_terms, list):
            video_terms = [term.strip() for term in video_terms]
        else:
@@ -68,9 +54,13 @@ def start(task_id, params: VideoParams):
    if not video_terms:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        logger.error("failed to generate video terms.")
-        return
+        return None
-    script_file = path.join(utils.task_dir(task_id), f"script.json")
+    return video_terms
 def save_script_data(task_id, video_script, video_terms, params):
    script_file = path.join(utils.task_dir(task_id), "script.json")
    script_data = {
        "script": video_script,
        "search_terms": video_terms,
@@ -80,11 +70,16 @@ def start(task_id, params: VideoParams):
    with open(script_file, "w", encoding="utf-8") as f:
        f.write(utils.to_json(script_data))
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
 def generate_audio(task_id, params, video_script):
    logger.info("\n\n## generating audio")
-    audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
+    audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
-    sub_maker = voice.tts(text=video_script, voice_name=voice_name, voice_file=audio_file)
+    sub_maker = voice.tts(
        text=video_script,
        voice_name=voice.parse_voice_name(params.voice_name),
        voice_rate=params.voice_rate,
        voice_file=audio_file,
    )
    if sub_maker is None:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        logger.error(
@@ -93,21 +88,25 @@ def start(task_id, params: VideoParams):
 2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
        """.strip()
        )
-        return
+        return None, None, None
-    audio_duration = voice.get_audio_duration(sub_maker)
+    audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
-    audio_duration = math.ceil(audio_duration)
+    return audio_file, audio_duration, sub_maker
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
-    subtitle_path = ""
+def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
-    if params.subtitle_enabled:
+    if not params.subtitle_enabled:
-        subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
+        return ""
    subtitle_path = path.join(utils.task_dir(task_id), "subtitle.srt")
    subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
    logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
    subtitle_fallback = False
    if subtitle_provider == "edge":
-            voice.create_subtitle(text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path)
+        voice.create_subtitle(
            text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
        )
        if not os.path.exists(subtitle_path):
            subtitle_fallback = True
            logger.warning("subtitle file not found, fallback to whisper")
@@ -120,59 +119,69 @@ def start(task_id, params: VideoParams):
    subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
    if not subtitle_lines:
        logger.warning(f"subtitle file is invalid: {subtitle_path}")
-            subtitle_path = ""
+        return ""
-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
+    return subtitle_path
-    downloaded_videos = []
+
 def get_video_materials(task_id, params, video_terms, audio_duration):
    if params.video_source == "local":
        logger.info("\n\n## preprocess local materials")
-        materials = video.preprocess_video(materials=params.video_materials, clip_duration=max_clip_duration)
+        materials = video.preprocess_video(
-        print(materials)
+            materials=params.video_materials, clip_duration=params.video_clip_duration
-
+        )
        if not materials:
            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
-            logger.error("no valid materials found, please check the materials and try again.")
+            logger.error(
-            return
+                "no valid materials found, please check the materials and try again."
-        for material_info in materials:
+            )
-            print(material_info)
+            return None
-            downloaded_videos.append(material_info.url)
+        return [material_info.url for material_info in materials]
    else:
        logger.info(f"\n\n## downloading videos from {params.video_source}")
-        downloaded_videos = material.download_videos(task_id=task_id,
+        downloaded_videos = material.download_videos(
            task_id=task_id,
            search_terms=video_terms,
            source=params.video_source,
            video_aspect=params.video_aspect,
            video_contact_mode=params.video_concat_mode,
            audio_duration=audio_duration * params.video_count,
-                                                     max_clip_duration=max_clip_duration,
+            max_clip_duration=params.video_clip_duration,
        )
        if not downloaded_videos:
            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
            logger.error(
-            "failed to download videos, maybe the network is not available. if you are in China, please use a VPN.")
+                "failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
-        return
+            )
            return None
        return downloaded_videos
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
 def generate_final_videos(
        task_id, params, downloaded_videos, audio_file, subtitle_path
 ):
    final_video_paths = []
    combined_video_paths = []
-    video_concat_mode = params.video_concat_mode
+    video_concat_mode = (
-    if params.video_count > 1:
+        params.video_concat_mode if params.video_count == 1 else VideoConcatMode.random
-        video_concat_mode = VideoConcatMode.random
+    )
    _progress = 50
    for i in range(params.video_count):
        index = i + 1
-        combined_video_path = path.join(utils.task_dir(task_id), f"combined-{index}.mp4")
+        combined_video_path = path.join(
            utils.task_dir(task_id), f"combined-{index}.mp4"
        )
        logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
-        video.combine_videos(combined_video_path=combined_video_path,
+        video.combine_videos(
            combined_video_path=combined_video_path,
            video_paths=downloaded_videos,
            audio_file=audio_file,
            video_aspect=params.video_aspect,
            video_concat_mode=video_concat_mode,
-                             max_clip_duration=max_clip_duration,
+            max_clip_duration=params.video_clip_duration,
-                             threads=n_threads)
+            threads=params.n_threads,
        )
        _progress += 50 / params.video_count / 2
        sm.state.update_task(task_id, progress=_progress)
@@ -180,8 +189,8 @@ def start(task_id, params: VideoParams):
        final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
        logger.info(f"\n\n## generating video: {index} => {final_video_path}")
-        # Put everything together
+        video.generate_video(
-        video.generate_video(video_path=combined_video_path,
+            video_path=combined_video_path,
            audio_path=audio_file,
            subtitle_path=subtitle_path,
            output_file=final_video_path,
@@ -194,16 +203,133 @@ def start(task_id, params: VideoParams):
        final_video_paths.append(final_video_path)
        combined_video_paths.append(combined_video_path)
-    logger.success(f"task {task_id} finished, generated {len(final_video_paths)} videos.")
+    return final_video_paths, combined_video_paths
 def start(task_id, params: VideoParams, stop_at: str = "video"):
    logger.info(f"start task: {task_id}, stop_at: {stop_at}")
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
    if type(params.video_concat_mode) is str:
        params.video_concat_mode = VideoConcatMode(params.video_concat_mode)
    # 1. Generate script
    video_script = generate_script(task_id, params)
    if not video_script:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        return
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
    if stop_at == "script":
        sm.state.update_task(
            task_id, state=const.TASK_STATE_COMPLETE, progress=100, script=video_script
        )
        return {"script": video_script}
    # 2. Generate terms
    video_terms = ""
    if params.video_source != "local":
        video_terms = generate_terms(task_id, params, video_script)
        if not video_terms:
            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
            return
    save_script_data(task_id, video_script, video_terms, params)
    if stop_at == "terms":
        sm.state.update_task(
            task_id, state=const.TASK_STATE_COMPLETE, progress=100, terms=video_terms
        )
        return {"script": video_script, "terms": video_terms}
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
    # 3. Generate audio
    audio_file, audio_duration, sub_maker = generate_audio(task_id, params, video_script)
    if not audio_file:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        return
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
    if stop_at == "audio":
        sm.state.update_task(
            task_id,
            state=const.TASK_STATE_COMPLETE,
            progress=100,
            audio_file=audio_file,
        )
        return {"audio_file": audio_file, "audio_duration": audio_duration}
    # 4. Generate subtitle
    subtitle_path = generate_subtitle(task_id, params, video_script, sub_maker, audio_file)
    if stop_at == "subtitle":
        sm.state.update_task(
            task_id,
            state=const.TASK_STATE_COMPLETE,
            progress=100,
            subtitle_path=subtitle_path,
        )
        return {"subtitle_path": subtitle_path}
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
    # 5. Get video materials
    downloaded_videos = get_video_materials(
        task_id, params, video_terms, audio_duration
    )
    if not downloaded_videos:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        return
    if stop_at == "materials":
        sm.state.update_task(
            task_id,
            state=const.TASK_STATE_COMPLETE,
            progress=100,
            materials=downloaded_videos,
        )
        return {"materials": downloaded_videos}
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
    # 6. Generate final videos
    final_video_paths, combined_video_paths = generate_final_videos(
        task_id, params, downloaded_videos, audio_file, subtitle_path
    )
    if not final_video_paths:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        return
    logger.success(
        f"task {task_id} finished, generated {len(final_video_paths)} videos."
    )
    kwargs = {
        "videos": final_video_paths,
-        "combined_videos": combined_video_paths
+        "combined_videos": combined_video_paths,
        "script": video_script,
        "terms": video_terms,
        "audio_file": audio_file,
        "audio_duration": audio_duration,
        "subtitle_path": subtitle_path,
        "materials": downloaded_videos,
    }
-    sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
+    sm.state.update_task(
        task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs
    )
    return kwargs
-# def start_test(task_id, params: VideoParams):
+
-#     print(f"start task {task_id} \n")
+if __name__ == "__main__":
-#     time.sleep(5)
+    task_id = "task_id"
-#     print(f"task {task_id} finished \n")
+    params = VideoParams(
        video_subject="金钱的作用",
        voice_name="zh-CN-XiaoyiNeural-Female",
        voice_rate=1.0,
    )
    start(task_id, params, stop_at="video")
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -1,13 +1,15 @@
 import glob
 import os
 import random
 from typing import List
-from PIL import ImageFont, Image
+
 from loguru import logger
-from moviepy.editor import *
+from moviepy import *
 from moviepy.video.tools.subtitles import SubtitlesClip
 from PIL import ImageFont
 from app.models import const
-from app.models.schema import VideoAspect, VideoParams, VideoConcatMode, MaterialInfo
+from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams
 from app.utils import utils
@@ -27,14 +29,15 @@ def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
    return ""
-def combine_videos(combined_video_path: str,
+def combine_videos(
    combined_video_path: str,
    video_paths: List[str],
    audio_file: str,
    video_aspect: VideoAspect = VideoAspect.portrait,
    video_concat_mode: VideoConcatMode = VideoConcatMode.random,
    max_clip_duration: int = 5,
    threads: int = 2,
-                   ) -> str:
+) -> str:
    audio_clip = AudioFileClip(audio_file)
    audio_duration = audio_clip.duration
    logger.info(f"max duration of audio: {audio_duration} seconds")
@@ -58,7 +61,7 @@ def combine_videos(combined_video_path: str,
        while start_time < clip_duration:
            end_time = min(start_time + max_clip_duration, clip_duration)
-            split_clip = clip.subclip(start_time, end_time)
+            split_clip = clip.subclipped(start_time, end_time)
            raw_clips.append(split_clip)
            # logger.info(f"splitting from {start_time:.2f} to {end_time:.2f}, clip duration {clip_duration:.2f}, split_clip duration {split_clip.duration:.2f}")
            start_time = end_time
@@ -74,11 +77,11 @@ def combine_videos(combined_video_path: str,
        for clip in raw_clips:
            # Check if clip is longer than the remaining audio
            if (audio_duration - video_duration) < clip.duration:
-                clip = clip.subclip(0, (audio_duration - video_duration))
+                clip = clip.subclipped(0, (audio_duration - video_duration))
            # Only shorten clips if the calculated clip length (req_dur) is shorter than the actual clip to prevent still image
            elif req_dur < clip.duration:
-                clip = clip.subclip(0, req_dur)
+                clip = clip.subclipped(0, req_dur)
-            clip = clip.set_fps(30)
+            clip = clip.with_fps(30)
            # Not all videos are same size, so we need to resize them
            clip_w, clip_h = clip.size
@@ -88,7 +91,7 @@ def combine_videos(combined_video_path: str,
                if clip_ratio == video_ratio:
                    # 等比例缩放
-                    clip = clip.resize((video_width, video_height))
+                    clip = clip.resized((video_width, video_height))
                else:
                    # 等比缩放视频
                    if clip_ratio > video_ratio:
@@ -100,27 +103,34 @@ def combine_videos(combined_video_path: str,
                    new_width = int(clip_w * scale_factor)
                    new_height = int(clip_h * scale_factor)
-                    clip_resized = clip.resize(newsize=(new_width, new_height))
+                    clip_resized = clip.resized(new_size=(new_width, new_height))
-                    background = ColorClip(size=(video_width, video_height), color=(0, 0, 0))
+                    background = ColorClip(
-                    clip = CompositeVideoClip([
+                        size=(video_width, video_height), color=(0, 0, 0)
-                        background.set_duration(clip.duration),
+                    )
-                        clip_resized.set_position("center")
+                    clip = CompositeVideoClip(
-                    ])
+                        [
                            background.with_duration(clip.duration),
                            clip_resized.with_position("center"),
                        ]
                    )
-                logger.info(f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}")
+                logger.info(
                    f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}"
                )
            if clip.duration > max_clip_duration:
-                clip = clip.subclip(0, max_clip_duration)
+                clip = clip.subclipped(0, max_clip_duration)
            clips.append(clip)
            video_duration += clip.duration
    video_clip = concatenate_videoclips(clips)
-    video_clip = video_clip.set_fps(30)
+    video_clip = video_clip.with_fps(30)
-    logger.info(f"writing")
+    logger.info("writing")
    # https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030
-    video_clip.write_videofile(filename=combined_video_path,
+    video_clip.write_videofile(
        filename=combined_video_path,
        threads=threads,
        logger=None,
        temp_audiofile_path=output_dir,
@@ -128,11 +138,11 @@ def combine_videos(combined_video_path: str,
        fps=30,
    )
    video_clip.close()
-    logger.success(f"completed")
+    logger.success("completed")
    return combined_video_path
-def wrap_text(text, max_width, font='Arial', fontsize=60):
+def wrap_text(text, max_width, font="Arial", fontsize=60):
    # 创建字体对象
    font = ImageFont.truetype(font, fontsize)
@@ -151,7 +161,7 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
    _wrapped_lines_ = []
    words = text.split(" ")
-    _txt_ = ''
+    _txt_ = ""
    for word in words:
        _before = _txt_
        _txt_ += f"{word} "
@@ -167,14 +177,14 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
    _wrapped_lines_.append(_txt_)
    if processed:
        _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
-        result = '\n'.join(_wrapped_lines_).strip()
+        result = "\n".join(_wrapped_lines_).strip()
        height = len(_wrapped_lines_) * height
        # logger.warning(f"wrapped text: {result}")
        return result, height
    _wrapped_lines_ = []
    chars = list(text)
-    _txt_ = ''
+    _txt_ = ""
    for word in chars:
        _txt_ += word
        _width, _height = get_text_size(_txt_)
@@ -182,20 +192,21 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
            continue
        else:
            _wrapped_lines_.append(_txt_)
-            _txt_ = ''
+            _txt_ = ""
    _wrapped_lines_.append(_txt_)
-    result = '\n'.join(_wrapped_lines_).strip()
+    result = "\n".join(_wrapped_lines_).strip()
    height = len(_wrapped_lines_) * height
    # logger.warning(f"wrapped text: {result}")
    return result, height
-def generate_video(video_path: str,
+def generate_video(
    video_path: str,
    audio_path: str,
    subtitle_path: str,
    output_file: str,
    params: VideoParams,
-                   ):
+):
    aspect = VideoAspect(params.video_aspect)
    video_width, video_height = aspect.to_resolution()
@@ -215,46 +226,62 @@ def generate_video(video_path: str,
        if not params.font_name:
            params.font_name = "STHeitiMedium.ttc"
        font_path = os.path.join(utils.font_dir(), params.font_name)
-        if os.name == 'nt':
+        if os.name == "nt":
            font_path = font_path.replace("\\", "/")
        logger.info(f"using font: {font_path}")
    def create_text_clip(subtitle_item):
        params.font_size = int(params.font_size)
        params.stroke_width = int(params.stroke_width)
        phrase = subtitle_item[1]
        max_width = video_width * 0.9
-        wrapped_txt, txt_height = wrap_text(phrase,
+        wrapped_txt, txt_height = wrap_text(
-                                            max_width=max_width,
+            phrase, max_width=max_width, font=font_path, fontsize=params.font_size
                                            font=font_path,
                                            fontsize=params.font_size
        )
        _clip = TextClip(
-            wrapped_txt,
+            text=wrapped_txt,
            font=font_path,
-            fontsize=params.font_size,
+            font_size=params.font_size,
            color=params.text_fore_color,
            bg_color=params.text_background_color,
            stroke_color=params.stroke_color,
            stroke_width=params.stroke_width,
            print_cmd=False,
        )
        duration = subtitle_item[0][1] - subtitle_item[0][0]
-        _clip = _clip.set_start(subtitle_item[0][0])
+        _clip = _clip.with_start(subtitle_item[0][0])
-        _clip = _clip.set_end(subtitle_item[0][1])
+        _clip = _clip.with_end(subtitle_item[0][1])
-        _clip = _clip.set_duration(duration)
+        _clip = _clip.with_duration(duration)
        if params.subtitle_position == "bottom":
-            _clip = _clip.set_position(('center', video_height * 0.95 - _clip.h))
+            _clip = _clip.with_position(("center", video_height * 0.95 - _clip.h))
        elif params.subtitle_position == "top":
-            _clip = _clip.set_position(('center', video_height * 0.1))
+            _clip = _clip.with_position(("center", video_height * 0.05))
-        else:
+        elif params.subtitle_position == "custom":
-            _clip = _clip.set_position(('center', 'center'))
+            # 确保字幕完全在屏幕内
            margin = 10  # 额外的边距，单位为像素
            max_y = video_height - _clip.h - margin
            min_y = margin
            custom_y = (video_height - _clip.h) * (params.custom_position / 100)
            custom_y = max(min_y, min(custom_y, max_y))  # 限制 y 值在有效范围内
            _clip = _clip.with_position(("center", custom_y))
        else:  # center
            _clip = _clip.with_position(("center", "center"))
        return _clip
    video_clip = VideoFileClip(video_path)
-    audio_clip = AudioFileClip(audio_path).volumex(params.voice_volume)
+    audio_clip = AudioFileClip(audio_path).with_effects(
        [afx.MultiplyVolume(params.voice_volume)]
    )
    if subtitle_path and os.path.exists(subtitle_path):
-        sub = SubtitlesClip(subtitles=subtitle_path, encoding='utf-8')
+        generator = lambda text: TextClip(
            text=text,
            font=font_path,
            font_size=params.font_size,
        )
        sub = SubtitlesClip(
            subtitles=subtitle_path, encoding="utf-8", make_textclip=generator
        )
        text_clips = []
        for item in sub.subtitles:
            clip = create_text_clip(subtitle_item=item)
@@ -264,16 +291,20 @@ def generate_video(video_path: str,
    bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
    if bgm_file:
        try:
-            bgm_clip = (AudioFileClip(bgm_file)
+            bgm_clip = AudioFileClip(bgm_file).with_effects(
-                        .volumex(params.bgm_volume)
+                [
-                        .audio_fadeout(3))
+                    afx.MultiplyVolume(params.voice_volume),
-            bgm_clip = afx.audio_loop(bgm_clip, duration=video_clip.duration)
+                    afx.AudioFadeOut(3),
                    afx.AudioLoop(duration=video_clip.duration),
                ]
            )
            audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
        except Exception as e:
            logger.error(f"failed to add bgm: {str(e)}")
-    video_clip = video_clip.set_audio(audio_clip)
+    video_clip = video_clip.with_audio(audio_clip)
-    video_clip.write_videofile(output_file,
+    video_clip.write_videofile(
        output_file,
        audio_codec="aac",
        temp_audiofile_path=output_dir,
        threads=params.n_threads or 2,
@@ -281,7 +312,8 @@ def generate_video(video_path: str,
        fps=30,
    )
    video_clip.close()
-    logger.success(f"completed")
+    del video_clip
    logger.success("completed")
 def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
@@ -292,7 +324,7 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
        ext = utils.parse_extension(material.url)
        try:
            clip = VideoFileClip(material.url)
-        except Exception as e:
+        except Exception:
            clip = ImageClip(material.url)
        width = clip.size[0]
@@ -304,12 +336,18 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
        if ext in const.FILE_TYPE_IMAGES:
            logger.info(f"processing image: {material.url}")
            # 创建一个图片剪辑，并设置持续时间为3秒钟
-            clip = ImageClip(material.url).set_duration(clip_duration).set_position("center")
+            clip = (
                ImageClip(material.url)
                .with_duration(clip_duration)
                .with_position("center")
            )
            # 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。
            # 假设我们想要从原始大小逐渐放大到120%的大小。
            # t代表当前时间，clip.duration为视频总时长，这里是3秒。
            # 注意：1 表示100%的大小，所以1.2表示120%的大小
-            zoom_clip = clip.resize(lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration))
+            zoom_clip = clip.resized(
                lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
            )
            # 如果需要，可以创建一个包含缩放剪辑的复合视频剪辑
            # （这在您想要在视频中添加其他元素时非常有用）
@@ -319,6 +357,7 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
            video_file = f"{material.url}.mp4"
            final_clip.write_videofile(video_file, fps=30, logger=None)
            final_clip.close()
            del final_clip
            material.url = video_file
            logger.success(f"completed: {video_file}")
    return materials
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -302,21 +302,33 @@ Gender: Female
 Name: en-US-AnaNeural
 Gender: Female
 Name: en-US-AndrewMultilingualNeural
 Gender: Male
 Name: en-US-AndrewNeural
 Gender: Male
 Name: en-US-AriaNeural
 Gender: Female
 Name: en-US-AvaMultilingualNeural
 Gender: Female
 Name: en-US-AvaNeural
 Gender: Female
 Name: en-US-BrianMultilingualNeural
 Gender: Male
 Name: en-US-BrianNeural
 Gender: Male
 Name: en-US-ChristopherNeural
 Gender: Male
 Name: en-US-EmmaMultilingualNeural
 Gender: Female
 Name: en-US-EmmaNeural
 Gender: Female
@@ -602,12 +614,24 @@ Gender: Male
 Name: it-IT-ElsaNeural
 Gender: Female
-Name: it-IT-GiuseppeNeural
+Name: it-IT-GiuseppeMultilingualNeural
 Gender: Male
 Name: it-IT-IsabellaNeural
 Gender: Female
 Name: iu-Cans-CA-SiqiniqNeural
 Gender: Female
 Name: iu-Cans-CA-TaqqiqNeural
 Gender: Male
 Name: iu-Latn-CA-SiqiniqNeural
 Gender: Female
 Name: iu-Latn-CA-TaqqiqNeural
 Gender: Male
 Name: ja-JP-KeitaNeural
 Gender: Male
@@ -644,7 +668,7 @@ Gender: Male
 Name: kn-IN-SapnaNeural
 Gender: Female
-Name: ko-KR-HyunsuNeural
+Name: ko-KR-HyunsuMultilingualNeural
 Gender: Male
 Name: ko-KR-InJoonNeural
@@ -758,7 +782,7 @@ Gender: Male
 Name: pt-BR-FranciscaNeural
 Gender: Female
-Name: pt-BR-ThalitaNeural
+Name: pt-BR-ThalitaMultilingualNeural
 Gender: Female
 Name: pt-PT-DuarteNeural
@@ -988,7 +1012,7 @@ Name: zh-CN-XiaoxiaoMultilingualNeural-V2
 Gender: Female
    """.strip()
    voices = []
-    name = ''
+    name = ""
    for line in voices_str.split("\n"):
        line = line.strip()
        if not line:
@@ -1008,7 +1032,7 @@ Gender: Female
                            voices.append(f"{name}-{gender}")
                else:
                    voices.append(f"{name}-{gender}")
-                name = ''
+                name = ""
    voices.sort()
    return voices
@@ -1028,28 +1052,45 @@ def is_azure_v2_voice(voice_name: str):
    return ""
-def tts(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
+def tts(
    text: str, voice_name: str, voice_rate: float, voice_file: str
 ) -> [SubMaker, None]:
    if is_azure_v2_voice(voice_name):
        return azure_tts_v2(text, voice_name, voice_file)
-    return azure_tts_v1(text, voice_name, voice_file)
+    return azure_tts_v1(text, voice_name, voice_rate, voice_file)
-def azure_tts_v1(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
+def convert_rate_to_percent(rate: float) -> str:
    if rate == 1.0:
        return "+0%"
    percent = round((rate - 1.0) * 100)
    if percent > 0:
        return f"+{percent}%"
    else:
        return f"{percent}%"
 def azure_tts_v1(
    text: str, voice_name: str, voice_rate: float, voice_file: str
 ) -> [SubMaker, None]:
    voice_name = parse_voice_name(voice_name)
    text = text.strip()
    rate_str = convert_rate_to_percent(voice_rate)
    for i in range(3):
        try:
            logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
            async def _do() -> SubMaker:
-                communicate = edge_tts.Communicate(text, voice_name)
+                communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
                sub_maker = edge_tts.SubMaker()
                with open(voice_file, "wb") as file:
                    async for chunk in communicate.stream():
                        if chunk["type"] == "audio":
                            file.write(chunk["data"])
                        elif chunk["type"] == "WordBoundary":
-                            sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
+                            sub_maker.create_sub(
                                (chunk["offset"], chunk["duration"]), chunk["text"]
                            )
                return sub_maker
            sub_maker = asyncio.run(_do())
@@ -1074,8 +1115,12 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
    def _format_duration_to_offset(duration) -> int:
        if isinstance(duration, str):
            time_obj = datetime.strptime(duration, "%H:%M:%S.%f")
-            milliseconds = (time_obj.hour * 3600000) + (time_obj.minute * 60000) + (time_obj.second * 1000) + (
+            milliseconds = (
-                    time_obj.microsecond // 1000)
+                (time_obj.hour * 3600000)
                + (time_obj.minute * 60000)
                + (time_obj.second * 1000)
                + (time_obj.microsecond // 1000)
            )
            return milliseconds * 10000
        if isinstance(duration, int):
@@ -1108,20 +1153,29 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
            # Creates an instance of a speech config with specified subscription key and service region.
            speech_key = config.azure.get("speech_key", "")
            service_region = config.azure.get("speech_region", "")
-            audio_config = speechsdk.audio.AudioOutputConfig(filename=voice_file, use_default_speaker=True)
+            audio_config = speechsdk.audio.AudioOutputConfig(
-            speech_config = speechsdk.SpeechConfig(subscription=speech_key,
+                filename=voice_file, use_default_speaker=True
-                                                   region=service_region)
+            )
            speech_config = speechsdk.SpeechConfig(
                subscription=speech_key, region=service_region
            )
            speech_config.speech_synthesis_voice_name = voice_name
            # speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
            #                            value='true')
-            speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
+            speech_config.set_property(
-                                       value='true')
+                property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
                value="true",
            )
            speech_config.set_speech_synthesis_output_format(
-                speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3)
+                speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3
-            speech_synthesizer = speechsdk.SpeechSynthesizer(audio_config=audio_config,
+            )
-                                                             speech_config=speech_config)
+            speech_synthesizer = speechsdk.SpeechSynthesizer(
-            speech_synthesizer.synthesis_word_boundary.connect(speech_synthesizer_word_boundary_cb)
+                audio_config=audio_config, speech_config=speech_config
            )
            speech_synthesizer.synthesis_word_boundary.connect(
                speech_synthesizer_word_boundary_cb
            )
            result = speech_synthesizer.speak_text_async(text).get()
            if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
@@ -1129,9 +1183,13 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
                return sub_maker
            elif result.reason == speechsdk.ResultReason.Canceled:
                cancellation_details = result.cancellation_details
-                logger.error(f"azure v2 speech synthesis canceled: {cancellation_details.reason}")
+                logger.error(
                    f"azure v2 speech synthesis canceled: {cancellation_details.reason}"
                )
                if cancellation_details.reason == speechsdk.CancellationReason.Error:
-                    logger.error(f"azure v2 speech synthesis error: {cancellation_details.error_details}")
+                    logger.error(
                        f"azure v2 speech synthesis error: {cancellation_details.error_details}"
                    )
            logger.info(f"completed, output file: {voice_file}")
        except Exception as e:
            logger.error(f"failed, error: {str(e)}")
@@ -1168,11 +1226,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
        """
        start_t = mktimestamp(start_time).replace(".", ",")
        end_t = mktimestamp(end_time).replace(".", ",")
-        return (
+        return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"
            f"{idx}\n"
            f"{start_t} --> {end_t}\n"
            f"{sub_text}\n"
        )
    start_time = -1.0
    sub_items = []
@@ -1229,12 +1283,16 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
            try:
                sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
                duration = max([tb for ((ta, tb), txt) in sbs])
-                logger.info(f"completed, subtitle file created: {subtitle_file}, duration: {duration}")
+                logger.info(
                    f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
                )
            except Exception as e:
                logger.error(f"failed, error: {str(e)}")
                os.remove(subtitle_file)
        else:
-            logger.warning(f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}")
+            logger.warning(
                f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}"
            )
    except Exception as e:
        logger.error(f"failed, error: {str(e)}")
@@ -1258,7 +1316,6 @@ if __name__ == "__main__":
    voices = get_all_azure_voices()
    print(len(voices))
    async def _do():
        temp_dir = utils.storage_dir("temp")
@@ -1307,12 +1364,13 @@ if __name__ == "__main__":
        for voice_name in voice_names:
            voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
            subtitle_file = f"{temp_dir}/tts.mp3.srt"
-            sub_maker = azure_tts_v2(text=text, voice_name=voice_name, voice_file=voice_file)
+            sub_maker = azure_tts_v2(
                text=text, voice_name=voice_name, voice_file=voice_file
            )
            create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
            audio_duration = get_audio_duration(sub_maker)
            print(f"voice: {voice_name}, audio duration: {audio_duration}s")
    loop = asyncio.get_event_loop_policy().get_event_loop()
    try:
        loop.run_until_complete(_do())
--- a/app/utils/utils.py
+++ b/app/utils/utils.py
@@ -15,12 +15,12 @@ urllib3.disable_warnings()
 def get_response(status: int, data: Any = None, message: str = ""):
    obj = {
-        'status': status,
+        "status": status,
    }
    if data:
-        obj['data'] = data
+        obj["data"] = data
    if message:
-        obj['message'] = message
+        obj["message"] = message
    return obj
@@ -41,7 +41,7 @@ def to_json(obj):
            elif isinstance(o, (list, tuple)):
                return [serialize(item) for item in o]
            # 如果对象是自定义类型，尝试返回其__dict__属性
-            elif hasattr(o, '__dict__'):
+            elif hasattr(o, "__dict__"):
                return serialize(o.__dict__)
            # 其他情况返回None（或者可以选择抛出异常）
            else:
@@ -199,7 +199,8 @@ def split_string_by_punctuations(s):
 def md5(text):
    import hashlib
-    return hashlib.md5(text.encode('utf-8')).hexdigest()
+
    return hashlib.md5(text.encode("utf-8")).hexdigest()
 def get_system_locale():
--- a/docs/api.jpg
+++ b/docs/api.jpg
--- a/docs/webui-en.jpg
+++ b/docs/webui-en.jpg
--- a/docs/webui.jpg
+++ b/docs/webui.jpg
--- a/docs/wechat-group.jpg
+++ b/docs/wechat-group.jpg
--- a/main.py
+++ b/main.py
@@ -1,8 +1,16 @@
 import uvicorn
 from loguru import logger
 from app.config import config
-if __name__ == '__main__':
+if __name__ == "__main__":
-    logger.info("start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs")
+    logger.info(
-    uvicorn.run(app="app.asgi:app", host=config.listen_host, port=config.listen_port, reload=config.reload_debug,
+        "start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs"
-                log_level="warning")
+    )
    uvicorn.run(
        app="app.asgi:app",
        host=config.listen_host,
        port=config.listen_port,
        reload=config.reload_debug,
        log_level="warning",
    )
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,26 +1,14 @@
-requests~=2.31.0
+moviepy==2.1.1
-moviepy~=2.0.0.dev2
+streamlit==1.40.2
-openai~=1.13.3
+edge_tts==6.1.19
-faster-whisper~=1.0.1
+fastapi==0.115.6
-edge_tts~=6.1.10
+uvicorn==0.32.1
-uvicorn~=0.27.1
+openai==1.56.1
-fastapi~=0.110.0
+faster-whisper==1.1.0
-tomli~=2.0.1
+loguru==0.7.2
-streamlit~=1.33.0
+google.generativeai==0.8.3
-loguru~=0.7.2
+dashscope==1.20.14
-aiohttp~=3.9.3
+g4f==0.3.8.1
-urllib3~=2.2.1
+azure-cognitiveservices-speech==1.41.1
-pillow~=10.3.0
+redis==5.2.0
-pydantic~=2.6.3
+python-multipart==0.0.19
 g4f~=0.3.0.4
 dashscope~=1.15.0
 google.generativeai~=0.4.1
 python-multipart~=0.0.9
 redis==5.0.3
 # if you use pillow~=10.3.0, you will get "PIL.Image' has no attribute 'ANTIALIAS'" error when resize video
 # please install opencv-python to fix "PIL.Image' has no attribute 'ANTIALIAS'" error
 opencv-python~=4.9.0.80
 # for azure speech
 # https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/9-more-realistic-ai-voices-for-conversations-now-generally/ba-p/4099471
 azure-cognitiveservices-speech~=1.37.0
 git-changelog~=2.5.2
--- a/webui/Main.py
+++ b/webui/Main.py
@@ -1,6 +1,5 @@
 import sys
 import os
-import time
+import sys
 # Add the root directory of the project to the system path to allow importing modules from the project
 root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
@@ -10,31 +9,33 @@ if root_dir not in sys.path:
    print(sys.path)
    print("")
 import streamlit as st
 import os
 from uuid import uuid4
 import platform
-import streamlit.components.v1 as components
+from uuid import uuid4
 import streamlit as st
 from loguru import logger
-st.set_page_config(page_title="MoneyPrinterTurbo",
+st.set_page_config(
    page_title="MoneyPrinterTurbo",
    page_icon="🤖",
    layout="wide",
    initial_sidebar_state="auto",
    menu_items={
-                       'Report a bug': "https://github.com/harry0703/MoneyPrinterTurbo/issues",
+        "Report a bug": "https://github.com/harry0703/MoneyPrinterTurbo/issues",
-                       'About': "# MoneyPrinterTurbo\nSimply provide a topic or keyword for a video, and it will "
+        "About": "# MoneyPrinterTurbo\nSimply provide a topic or keyword for a video, and it will "
        "automatically generate the video copy, video materials, video subtitles, "
        "and video background music before synthesizing a high-definition short "
-                                "video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo"
+        "video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo",
-                   })
+    },
 )
 from app.models.schema import VideoParams, VideoAspect, VideoConcatMode, MaterialInfo
 from app.services import task as tm, llm, voice
 from app.utils import utils
 from app.config import config
-from app.models.const import FILE_TYPE_VIDEOS, FILE_TYPE_IMAGES
+from app.models.const import FILE_TYPE_IMAGES, FILE_TYPE_VIDEOS
 from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams
 from app.services import llm, voice
 from app.services import task as tm
 from app.utils import utils
 hide_streamlit_style = """
 <style>#root > div:nth-child(1) > div > div > div > div > section > div {padding-top: 0rem;}</style>
@@ -42,7 +43,16 @@ hide_streamlit_style = """
 st.markdown(hide_streamlit_style, unsafe_allow_html=True)
 st.title(f"MoneyPrinterTurbo v{config.project_version}")
-support_locales = ["zh-CN", "zh-HK", "zh-TW", "de-DE", "en-US", "fr-FR", "vi-VN", "th-TH"]
+support_locales = [
    "zh-CN",
    "zh-HK",
    "zh-TW",
    "de-DE",
    "en-US",
    "fr-FR",
    "vi-VN",
    "th-TH",
 ]
 font_dir = os.path.join(root_dir, "resource", "fonts")
 song_dir = os.path.join(root_dir, "resource", "songs")
@@ -51,14 +61,14 @@ config_file = os.path.join(root_dir, "webui", ".streamlit", "webui.toml")
 system_locale = utils.get_system_locale()
 # print(f"******** system locale: {system_locale} ********")
-if 'video_subject' not in st.session_state:
+if "video_subject" not in st.session_state:
-    st.session_state['video_subject'] = ''
+    st.session_state["video_subject"] = ""
-if 'video_script' not in st.session_state:
+if "video_script" not in st.session_state:
-    st.session_state['video_script'] = ''
+    st.session_state["video_script"] = ""
-if 'video_terms' not in st.session_state:
+if "video_terms" not in st.session_state:
-    st.session_state['video_terms'] = ''
+    st.session_state["video_terms"] = ""
-if 'ui_language' not in st.session_state:
+if "ui_language" not in st.session_state:
-    st.session_state['ui_language'] = config.ui.get("language", system_locale)
+    st.session_state["ui_language"] = config.ui.get("language", system_locale)
 def get_all_fonts():
@@ -85,25 +95,25 @@ def open_task_folder(task_id):
        sys = platform.system()
        path = os.path.join(root_dir, "storage", "tasks", task_id)
        if os.path.exists(path):
-            if sys == 'Windows':
+            if sys == "Windows":
                os.system(f"start {path}")
-            if sys == 'Darwin':
+            if sys == "Darwin":
                os.system(f"open {path}")
    except Exception as e:
        logger.error(e)
 def scroll_to_bottom():
-    js = f"""
+    js = """
    <script>
        console.log("scroll_to_bottom");
-        function scroll(dummy_var_to_force_repeat_execution){{
+        function scroll(dummy_var_to_force_repeat_execution){
            var sections = parent.document.querySelectorAll('section.main');
            console.log(sections);
-            for(let index = 0; index<sections.length; index++) {{
+            for(let index = 0; index<sections.length; index++) {
                sections[index].scrollTop = sections[index].scrollHeight;
-            }}
+            }
-        }}
+        }
        scroll(1);
    </script>
    """
@@ -123,12 +133,15 @@ def init_log():
        record["file"].path = f"./{relative_path}"
        # 返回修改后的格式字符串
        # 您可以根据需要调整这里的格式
-        record['message'] = record['message'].replace(root_dir, ".")
+        record["message"] = record["message"].replace(root_dir, ".")
-        _format = '<green>{time:%Y-%m-%d %H:%M:%S}</> | ' + \
+        _format = (
-                  '<level>{level}</> | ' + \
+            "<green>{time:%Y-%m-%d %H:%M:%S}</> | "
-                  '"{file.path}:{line}":<blue> {function}</> ' + \
+            + "<level>{level}</> | "
-                  '- <level>{message}</>' + "\n"
+            + '"{file.path}:{line}":<blue> {function}</> '
            + "- <level>{message}</>"
            + "\n"
        )
        return _format
    logger.add(
@@ -145,7 +158,7 @@ locales = utils.load_locales(i18n_dir)
 def tr(key):
-    loc = locales.get(st.session_state['ui_language'], {})
+    loc = locales.get(st.session_state["ui_language"], {})
    return loc.get("Translation", {}).get(key, key)
@@ -164,15 +177,22 @@ if not config.app.get("hide_config", False):
            selected_index = 0
            for i, code in enumerate(locales.keys()):
                display_languages.append(f"{code} - {locales[code].get('Language')}")
-                if code == st.session_state['ui_language']:
+                if code == st.session_state["ui_language"]:
                    selected_index = i
-            selected_language = st.selectbox(tr("Language"), options=display_languages,
+            selected_language = st.selectbox(
-                                             index=selected_index)
+                tr("Language"), options=display_languages, index=selected_index
            )
            if selected_language:
                code = selected_language.split(" - ")[0].strip()
-                st.session_state['ui_language'] = code
+                st.session_state["ui_language"] = code
-                config.ui['language'] = code
+                config.ui["language"] = code
            # 是否禁用日志显示
            hide_log = st.checkbox(
                tr("Hide Log"), value=config.app.get("hide_log", False)
            )
            config.ui["hide_log"] = hide_log
        with middle_config_panel:
            #   openai
@@ -183,8 +203,19 @@ if not config.app.get("hide_config", False):
            #   qwen (通义千问)
            #   gemini
            #   ollama
-            llm_providers = ['OpenAI', 'Moonshot', 'Azure', 'Qwen', 'DeepSeek', 'Gemini', 'Ollama', 'G4f', 'OneAPI',
+            llm_providers = [
-                             "Cloudflare"]
+                "OpenAI",
                "Moonshot",
                "Azure",
                "Qwen",
                "DeepSeek",
                "Gemini",
                "Ollama",
                "G4f",
                "OneAPI",
                "Cloudflare",
                "ERNIE",
            ]
            saved_llm_provider = config.app.get("llm_provider", "OpenAI").lower()
            saved_llm_provider_index = 0
            for i, provider in enumerate(llm_providers):
@@ -192,18 +223,25 @@ if not config.app.get("hide_config", False):
                    saved_llm_provider_index = i
                    break
-            llm_provider = st.selectbox(tr("LLM Provider"), options=llm_providers, index=saved_llm_provider_index)
+            llm_provider = st.selectbox(
                tr("LLM Provider"),
                options=llm_providers,
                index=saved_llm_provider_index,
            )
            llm_helper = st.container()
            llm_provider = llm_provider.lower()
            config.app["llm_provider"] = llm_provider
            llm_api_key = config.app.get(f"{llm_provider}_api_key", "")
            llm_secret_key = config.app.get(
                f"{llm_provider}_secret_key", ""
            )  # only for baidu ernie
            llm_base_url = config.app.get(f"{llm_provider}_base_url", "")
            llm_model_name = config.app.get(f"{llm_provider}_model_name", "")
            llm_account_id = config.app.get(f"{llm_provider}_account_id", "")
            tips = ""
-            if llm_provider == 'ollama':
+            if llm_provider == "ollama":
                if not llm_model_name:
                    llm_model_name = "qwen:7b"
                if not llm_base_url:
@@ -219,7 +257,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 使用 `ollama list` 查看，比如 `qwen:7b`
                           """
-            if llm_provider == 'openai':
+            if llm_provider == "openai":
                if not llm_model_name:
                    llm_model_name = "gpt-3.5-turbo"
                with llm_helper:
@@ -231,7 +269,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 填写**有权限**的模型，[点击查看模型列表](https://platform.openai.com/settings/organization/limits)
                           """
-            if llm_provider == 'moonshot':
+            if llm_provider == "moonshot":
                if not llm_model_name:
                    llm_model_name = "moonshot-v1-8k"
                with llm_helper:
@@ -241,8 +279,20 @@ if not config.app.get("hide_config", False):
                           - **Base Url**: 固定为 https://api.moonshot.cn/v1
                           - **Model Name**: 比如 moonshot-v1-8k，[点击查看模型列表](https://platform.moonshot.cn/docs/intro#%E6%A8%A1%E5%9E%8B%E5%88%97%E8%A1%A8)
                           """
            if llm_provider == "oneapi":
                if not llm_model_name:
                    llm_model_name = (
                        "claude-3-5-sonnet-20240620"  # 默认模型，可以根据需要调整
                    )
                with llm_helper:
                    tips = """
                        ##### OneAPI 配置说明
                        - **API Key**: 填写您的 OneAPI 密钥
                        - **Base Url**: 填写 OneAPI 的基础 URL
                        - **Model Name**: 填写您要使用的模型名称，例如 claude-3-5-sonnet-20240620
                        """
-            if llm_provider == 'qwen':
+            if llm_provider == "qwen":
                if not llm_model_name:
                    llm_model_name = "qwen-max"
                with llm_helper:
@@ -253,7 +303,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 比如 qwen-max，[点击查看模型列表](https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction#3ef6d0bcf91wy)
                           """
-            if llm_provider == 'g4f':
+            if llm_provider == "g4f":
                if not llm_model_name:
                    llm_model_name = "gpt-3.5-turbo"
                with llm_helper:
@@ -264,7 +314,7 @@ if not config.app.get("hide_config", False):
                           - **Base Url**: 留空
                           - **Model Name**: 比如 gpt-3.5-turbo，[点击查看模型列表](https://github.com/xtekky/gpt4free/blob/main/g4f/models.py#L308)
                           """
-            if llm_provider == 'azure':
+            if llm_provider == "azure":
                with llm_helper:
                    tips = """
                           ##### Azure 配置说明
@@ -274,7 +324,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 填写你实际的部署名
                           """
-            if llm_provider == 'gemini':
+            if llm_provider == "gemini":
                if not llm_model_name:
                    llm_model_name = "gemini-1.0-pro"
@@ -287,7 +337,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 比如 gemini-1.0-pro
                           """
-            if llm_provider == 'deepseek':
+            if llm_provider == "deepseek":
                if not llm_model_name:
                    llm_model_name = "deepseek-chat"
                if not llm_base_url:
@@ -300,14 +350,36 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 固定为 deepseek-chat
                           """
-            if tips and config.ui['language'] == 'zh':
+            if llm_provider == "ernie":
                with llm_helper:
                    tips = """
                           ##### 百度文心一言 配置说明
                           - **API Key**: [点击到官网申请](https://console.bce.baidu.com/qianfan/ais/console/applicationConsole/application)
                           - **Secret Key**: [点击到官网申请](https://console.bce.baidu.com/qianfan/ais/console/applicationConsole/application)
                           - **Base Url**: 填写 **请求地址** [点击查看文档](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/jlil56u11#%E8%AF%B7%E6%B1%82%E8%AF%B4%E6%98%8E)
                           """
            if tips and config.ui["language"] == "zh":
                st.warning(
-                    "中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商\n- 国内可直接访问，不需要VPN \n- 注册就送额度，基本够用")
+                    "中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商\n- 国内可直接访问，不需要VPN \n- 注册就送额度，基本够用"
                )
                st.info(tips)
-            st_llm_api_key = st.text_input(tr("API Key"), value=llm_api_key, type="password")
+            st_llm_api_key = st.text_input(
                tr("API Key"), value=llm_api_key, type="password"
            )
            st_llm_base_url = st.text_input(tr("Base Url"), value=llm_base_url)
-            st_llm_model_name = st.text_input(tr("Model Name"), value=llm_model_name)
+            st_llm_model_name = ""
            if llm_provider != "ernie":
                st_llm_model_name = st.text_input(
                    tr("Model Name"),
                    value=llm_model_name,
                    key=f"{llm_provider}_model_name_input",
                )
                if st_llm_model_name:
                    config.app[f"{llm_provider}_model_name"] = st_llm_model_name
            else:
                st_llm_model_name = None
            if st_llm_api_key:
                config.app[f"{llm_provider}_api_key"] = st_llm_api_key
@@ -315,13 +387,21 @@ if not config.app.get("hide_config", False):
                config.app[f"{llm_provider}_base_url"] = st_llm_base_url
            if st_llm_model_name:
                config.app[f"{llm_provider}_model_name"] = st_llm_model_name
            if llm_provider == "ernie":
                st_llm_secret_key = st.text_input(
                    tr("Secret Key"), value=llm_secret_key, type="password"
                )
                config.app[f"{llm_provider}_secret_key"] = st_llm_secret_key
-            if llm_provider == 'cloudflare':
+            if llm_provider == "cloudflare":
-                st_llm_account_id = st.text_input(tr("Account ID"), value=llm_account_id)
+                st_llm_account_id = st.text_input(
                    tr("Account ID"), value=llm_account_id
                )
                if st_llm_account_id:
                    config.app[f"{llm_provider}_account_id"] = st_llm_account_id
        with right_config_panel:
            def get_keys_from_config(cfg_key):
                api_keys = config.app.get(cfg_key, [])
                if isinstance(api_keys, str):
@@ -329,19 +409,21 @@ if not config.app.get("hide_config", False):
                api_key = ", ".join(api_keys)
                return api_key
            def save_keys_to_config(cfg_key, value):
                value = value.replace(" ", "")
                if value:
                    config.app[cfg_key] = value.split(",")
            pexels_api_key = get_keys_from_config("pexels_api_keys")
-            pexels_api_key = st.text_input(tr("Pexels API Key"), value=pexels_api_key, type="password")
+            pexels_api_key = st.text_input(
                tr("Pexels API Key"), value=pexels_api_key, type="password"
            )
            save_keys_to_config("pexels_api_keys", pexels_api_key)
            pixabay_api_key = get_keys_from_config("pixabay_api_keys")
-            pixabay_api_key = st.text_input(tr("Pixabay API Key"), value=pixabay_api_key, type="password")
+            pixabay_api_key = st.text_input(
                tr("Pixabay API Key"), value=pixabay_api_key, type="password"
            )
            save_keys_to_config("pixabay_api_keys", pixabay_api_key)
 panel = st.columns(3)
@@ -355,8 +437,9 @@ uploaded_files = []
 with left_panel:
    with st.container(border=True):
        st.write(tr("Video Script Settings"))
-        params.video_subject = st.text_input(tr("Video Subject"),
+        params.video_subject = st.text_input(
-                                             value=st.session_state['video_subject']).strip()
+            tr("Video Subject"), value=st.session_state["video_subject"]
        ).strip()
        video_languages = [
            (tr("Auto Detect"), ""),
@@ -364,24 +447,27 @@ with left_panel:
        for code in support_locales:
            video_languages.append((code, code))
-        selected_index = st.selectbox(tr("Script Language"),
+        selected_index = st.selectbox(
            tr("Script Language"),
            index=0,
            options=range(len(video_languages)),  # 使用索引作为内部选项值
-                                      format_func=lambda x: video_languages[x][0]  # 显示给用户的是标签
+            format_func=lambda x: video_languages[x][0],  # 显示给用户的是标签
        )
        params.video_language = video_languages[selected_index][1]
-        if st.button(tr("Generate Video Script and Keywords"), key="auto_generate_script"):
+        if st.button(
            tr("Generate Video Script and Keywords"), key="auto_generate_script"
        ):
            with st.spinner(tr("Generating Video Script and Keywords")):
-                script = llm.generate_script(video_subject=params.video_subject, language=params.video_language)
+                script = llm.generate_script(
                    video_subject=params.video_subject, language=params.video_language
                )
                terms = llm.generate_terms(params.video_subject, script)
-                st.session_state['video_script'] = script
+                st.session_state["video_script"] = script
-                st.session_state['video_terms'] = ", ".join(terms)
+                st.session_state["video_terms"] = ", ".join(terms)
        params.video_script = st.text_area(
-            tr("Video Script"),
+            tr("Video Script"), value=st.session_state["video_script"], height=280
            value=st.session_state['video_script'],
            height=280
        )
        if st.button(tr("Generate Video Keywords"), key="auto_generate_terms"):
            if not params.video_script:
@@ -390,12 +476,11 @@ with left_panel:
            with st.spinner(tr("Generating Video Keywords")):
                terms = llm.generate_terms(params.video_subject, params.video_script)
-                st.session_state['video_terms'] = ", ".join(terms)
+                st.session_state["video_terms"] = ", ".join(terms)
        params.video_terms = st.text_area(
-            tr("Video Keywords"),
+            tr("Video Keywords"), value=st.session_state["video_terms"]
-            value=st.session_state['video_terms'],
+        )
            height=50)
 with middle_panel:
    with st.container(border=True):
@@ -414,73 +499,93 @@ with middle_panel:
        ]
        saved_video_source_name = config.app.get("video_source", "pexels")
-        saved_video_source_index = [v[1] for v in video_sources].index(saved_video_source_name)
+        saved_video_source_index = [v[1] for v in video_sources].index(
            saved_video_source_name
        )
-        selected_index = st.selectbox(tr("Video Source"),
+        selected_index = st.selectbox(
            tr("Video Source"),
            options=range(len(video_sources)),
            format_func=lambda x: video_sources[x][0],
-                                      index=saved_video_source_index
+            index=saved_video_source_index,
        )
        params.video_source = video_sources[selected_index][1]
        config.app["video_source"] = params.video_source
-        if params.video_source == 'local':
+        if params.video_source == "local":
            _supported_types = FILE_TYPE_VIDEOS + FILE_TYPE_IMAGES
-            uploaded_files = st.file_uploader("Upload Local Files",
+            uploaded_files = st.file_uploader(
                "Upload Local Files",
                type=["mp4", "mov", "avi", "flv", "mkv", "jpg", "jpeg", "png"],
-                                              accept_multiple_files=True)
+                accept_multiple_files=True,
            )
-        selected_index = st.selectbox(tr("Video Concat Mode"),
+        selected_index = st.selectbox(
            tr("Video Concat Mode"),
            index=1,
            options=range(len(video_concat_modes)),  # 使用索引作为内部选项值
-                                      format_func=lambda x: video_concat_modes[x][0]  # 显示给用户的是标签
+            format_func=lambda x: video_concat_modes[x][0],  # 显示给用户的是标签
        )
        params.video_concat_mode = VideoConcatMode(
            video_concat_modes[selected_index][1]
        )
        params.video_concat_mode = VideoConcatMode(video_concat_modes[selected_index][1])
        video_aspect_ratios = [
            (tr("Portrait"), VideoAspect.portrait.value),
            (tr("Landscape"), VideoAspect.landscape.value),
        ]
-        selected_index = st.selectbox(tr("Video Ratio"),
+        selected_index = st.selectbox(
            tr("Video Ratio"),
            options=range(len(video_aspect_ratios)),  # 使用索引作为内部选项值
-                                      format_func=lambda x: video_aspect_ratios[x][0]  # 显示给用户的是标签
+            format_func=lambda x: video_aspect_ratios[x][0],  # 显示给用户的是标签
        )
        params.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
-        params.video_clip_duration = st.selectbox(tr("Clip Duration"), options=[2, 3, 4, 5, 6], index=1)
+        params.video_clip_duration = st.selectbox(
-        params.video_count = st.selectbox(tr("Number of Videos Generated Simultaneously"), options=[1, 2, 3, 4, 5],
+            tr("Clip Duration"), options=[2, 3, 4, 5, 6, 7, 8, 9, 10], index=1
-                                          index=0)
+        )
        params.video_count = st.selectbox(
            tr("Number of Videos Generated Simultaneously"),
            options=[1, 2, 3, 4, 5],
            index=0,
        )
    with st.container(border=True):
        st.write(tr("Audio Settings"))
        # tts_providers = ['edge', 'azure']
        # tts_provider = st.selectbox(tr("TTS Provider"), tts_providers)
-        voices = voice.get_all_azure_voices(
+        voices = voice.get_all_azure_voices(filter_locals=support_locales)
            filter_locals=support_locales)
        friendly_names = {
-            v: v.
+            v: v.replace("Female", tr("Female"))
-            replace("Female", tr("Female")).
+            .replace("Male", tr("Male"))
-            replace("Male", tr("Male")).
+            .replace("Neural", "")
-            replace("Neural", "") for
+            for v in voices
-            v in voices}
+        }
        saved_voice_name = config.ui.get("voice_name", "")
        saved_voice_name_index = 0
        if saved_voice_name in friendly_names:
            saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
        else:
            for i, v in enumerate(voices):
-                if v.lower().startswith(st.session_state['ui_language'].lower()) and "V2" not in v:
+                if (
                    v.lower().startswith(st.session_state["ui_language"].lower())
                    and "V2" not in v
                ):
                    saved_voice_name_index = i
                    break
-        selected_friendly_name = st.selectbox(tr("Speech Synthesis"),
+        selected_friendly_name = st.selectbox(
            tr("Speech Synthesis"),
            options=list(friendly_names.values()),
-                                              index=saved_voice_name_index)
+            index=saved_voice_name_index,
        )
-        voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)]
+        voice_name = list(friendly_names.keys())[
            list(friendly_names.values()).index(selected_friendly_name)
        ]
        params.voice_name = voice_name
-        config.ui['voice_name'] = voice_name
+        config.ui["voice_name"] = voice_name
        if st.button(tr("Play Voice")):
            play_content = params.video_subject
@@ -491,11 +596,21 @@ with middle_panel:
            with st.spinner(tr("Synthesizing Voice")):
                temp_dir = utils.storage_dir("temp", create=True)
                audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
-                sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_file=audio_file)
+                sub_maker = voice.tts(
                    text=play_content,
                    voice_name=voice_name,
                    voice_rate=params.voice_rate,
                    voice_file=audio_file,
                )
                # if the voice file generation failed, try again with a default content.
                if not sub_maker:
                    play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
-                    sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_file=audio_file)
+                    sub_maker = voice.tts(
                        text=play_content,
                        voice_name=voice_name,
                        voice_rate=params.voice_rate,
                        voice_file=audio_file,
                    )
                if sub_maker and os.path.exists(audio_file):
                    st.audio(audio_file, format="audio/mp3")
@@ -503,24 +618,39 @@ with middle_panel:
                        os.remove(audio_file)
        if voice.is_azure_v2_voice(voice_name):
-            saved_azure_speech_region = config.azure.get(f"speech_region", "")
+            saved_azure_speech_region = config.azure.get("speech_region", "")
-            saved_azure_speech_key = config.azure.get(f"speech_key", "")
+            saved_azure_speech_key = config.azure.get("speech_key", "")
-            azure_speech_region = st.text_input(tr("Speech Region"), value=saved_azure_speech_region)
+            azure_speech_region = st.text_input(
-            azure_speech_key = st.text_input(tr("Speech Key"), value=saved_azure_speech_key, type="password")
+                tr("Speech Region"), value=saved_azure_speech_region
            )
            azure_speech_key = st.text_input(
                tr("Speech Key"), value=saved_azure_speech_key, type="password"
            )
            config.azure["speech_region"] = azure_speech_region
            config.azure["speech_key"] = azure_speech_key
-        params.voice_volume = st.selectbox(tr("Speech Volume"),
+        params.voice_volume = st.selectbox(
-                                           options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0], index=2)
+            tr("Speech Volume"),
            options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0],
            index=2,
        )
        params.voice_rate = st.selectbox(
            tr("Speech Rate"),
            options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
            index=2,
        )
        bgm_options = [
            (tr("No Background Music"), ""),
            (tr("Random Background Music"), "random"),
            (tr("Custom Background Music"), "custom"),
        ]
-        selected_index = st.selectbox(tr("Background Music"),
+        selected_index = st.selectbox(
            tr("Background Music"),
            index=1,
            options=range(len(bgm_options)),  # 使用索引作为内部选项值
-                                      format_func=lambda x: bgm_options[x][0]  # 显示给用户的是标签
+            format_func=lambda x: bgm_options[x][0],  # 显示给用户的是标签
        )
        # 获取选择的背景音乐类型
        params.bgm_type = bgm_options[selected_index][1]
@@ -531,8 +661,11 @@ with middle_panel:
            if custom_bgm_file and os.path.exists(custom_bgm_file):
                params.bgm_file = custom_bgm_file
                # st.write(f":red[已选择自定义背景音乐]：**{custom_bgm_file}**")
-        params.bgm_volume = st.selectbox(tr("Background Music Volume"),
+        params.bgm_volume = st.selectbox(
-                                         options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)
+            tr("Background Music Volume"),
            options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
            index=2,
        )
 with right_panel:
    with st.container(border=True):
@@ -543,31 +676,48 @@ with right_panel:
        saved_font_name_index = 0
        if saved_font_name in font_names:
            saved_font_name_index = font_names.index(saved_font_name)
-        params.font_name = st.selectbox(tr("Font"), font_names, index=saved_font_name_index)
+        params.font_name = st.selectbox(
-        config.ui['font_name'] = params.font_name
+            tr("Font"), font_names, index=saved_font_name_index
        )
        config.ui["font_name"] = params.font_name
        subtitle_positions = [
            (tr("Top"), "top"),
            (tr("Center"), "center"),
            (tr("Bottom"), "bottom"),
            (tr("Custom"), "custom"),
        ]
-        selected_index = st.selectbox(tr("Position"),
+        selected_index = st.selectbox(
            tr("Position"),
            index=2,
-                                      options=range(len(subtitle_positions)),  # 使用索引作为内部选项值
+            options=range(len(subtitle_positions)),
-                                      format_func=lambda x: subtitle_positions[x][0]  # 显示给用户的是标签
+            format_func=lambda x: subtitle_positions[x][0],
        )
        params.subtitle_position = subtitle_positions[selected_index][1]
        if params.subtitle_position == "custom":
            custom_position = st.text_input(
                tr("Custom Position (% from top)"), value="70.0"
            )
            try:
                params.custom_position = float(custom_position)
                if params.custom_position < 0 or params.custom_position > 100:
                    st.error(tr("Please enter a value between 0 and 100"))
            except ValueError:
                st.error(tr("Please enter a valid number"))
        font_cols = st.columns([0.3, 0.7])
        with font_cols[0]:
            saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF")
-            params.text_fore_color = st.color_picker(tr("Font Color"), saved_text_fore_color)
+            params.text_fore_color = st.color_picker(
-            config.ui['text_fore_color'] = params.text_fore_color
+                tr("Font Color"), saved_text_fore_color
            )
            config.ui["text_fore_color"] = params.text_fore_color
        with font_cols[1]:
            saved_font_size = config.ui.get("font_size", 60)
            params.font_size = st.slider(tr("Font Size"), 30, 100, saved_font_size)
-            config.ui['font_size'] = params.font_size
+            config.ui["font_size"] = params.font_size
        stroke_cols = st.columns([0.3, 0.7])
        with stroke_cols[0]:
@@ -584,7 +734,7 @@ if start_button:
        scroll_to_bottom()
        st.stop()
-    if llm_provider != 'g4f' and not config.app.get(f"{llm_provider}_api_key", ""):
+    if llm_provider != "g4f" and llm_provider != 'ollama' and not config.app.get(f"{llm_provider}_api_key", ""):
        st.error(tr("Please Enter the LLM API Key"))
        scroll_to_bottom()
        st.stop()
@@ -620,13 +770,13 @@ if start_button:
    log_container = st.empty()
    log_records = []
    def log_received(msg):
        if config.ui["hide_log"]:
            return
        with log_container:
            log_records.append(msg)
            st.code("\n".join(log_records))
    logger.add(log_received)
    st.toast(tr("Generating Video"))
@@ -648,7 +798,7 @@ if start_button:
            player_cols = st.columns(len(video_files) * 2 + 1)
            for i, url in enumerate(video_files):
                player_cols[i * 2 + 1].video(url)
-    except Exception as e:
+    except Exception:
        pass
    open_task_folder(task_id)
--- a/webui/i18n/de.json
+++ b/webui/i18n/de.json
@@ -26,6 +26,7 @@
    "Speech Region": "Region(:red[Required，[Get Region](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "API Key(:red[Required，[Get API Key](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "Lautstärke der Sprachausgabe",
    "Speech Rate": "Lesegeschwindigkeit (1,0 bedeutet 1x)",
    "Male": "Männlich",
    "Female": "Weiblich",
    "Background Music": "Hintergrundmusik",
@@ -41,6 +42,7 @@
    "Top": "Oben",
    "Center": "Mittig",
    "Bottom": "Unten (empfohlen)",
    "Custom": "Benutzerdefinierte Position (70, was 70% von oben bedeutet)",
    "Font Size": "Schriftgröße für Untertitel",
    "Font Color": "Schriftfarbe",
    "Stroke Color": "Kontur",
--- a/webui/i18n/en.json
+++ b/webui/i18n/en.json
@@ -26,6 +26,7 @@
    "Speech Region": "Region(:red[Required，[Get Region](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "API Key(:red[Required，[Get API Key](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "Speech Volume (1.0 represents 100%)",
    "Speech Rate": "Speech Rate (1.0 means 1x speed)",
    "Male": "Male",
    "Female": "Female",
    "Background Music": "Background Music",
@@ -41,6 +42,7 @@
    "Top": "Top",
    "Center": "Center",
    "Bottom": "Bottom (Recommended)",
    "Custom": "Custom position (70, indicating 70% down from the top)",
    "Font Size": "Subtitle Font Size",
    "Font Color": "Subtitle Font Color",
    "Stroke Color": "Subtitle Outline Color",
@@ -73,6 +75,7 @@
    "Play Voice": "Play Voice",
    "Voice Example": "This is an example text for testing speech synthesis",
    "Synthesizing Voice": "Synthesizing voice, please wait...",
-    "TTS Provider": "Select the voice synthesis provider"
+    "TTS Provider": "Select the voice synthesis provider",
    "Hide Log": "Hide Log"
  }
 }
--- a/webui/i18n/pt.json
+++ b/webui/i18n/pt.json
@@ -0,0 +1,81 @@
 {
  "Language": "Português Brasileiro",
  "Translation": {
    "Video Script Settings": "**Configurações do Roteiro do Vídeo**",
    "Video Subject": "Tema do Vídeo (Forneça uma palavra-chave, :red[a IA irá gerar automaticamente] o roteiro do vídeo)",
    "Script Language": "Idioma para Gerar o Roteiro do Vídeo (a IA irá gerar automaticamente com base no idioma do seu tema)",
    "Generate Video Script and Keywords": "Clique para usar a IA para gerar o [Roteiro do Vídeo] e as [Palavras-chave do Vídeo] com base no **tema**",
    "Auto Detect": "Detectar Automaticamente",
    "Video Script": "Roteiro do Vídeo (:blue[① Opcional, gerado pela IA  ② Pontuação adequada ajuda na geração de legendas])",
    "Generate Video Keywords": "Clique para usar a IA para gerar [Palavras-chave do Vídeo] com base no **roteiro**",
    "Please Enter the Video Subject": "Por favor, insira o Roteiro do Vídeo primeiro",
    "Generating Video Script and Keywords": "A IA está gerando o roteiro do vídeo e as palavras-chave...",
    "Generating Video Keywords": "A IA está gerando as palavras-chave do vídeo...",
    "Video Keywords": "Palavras-chave do Vídeo (:blue[① Opcional, gerado pela IA ② Use **vírgulas em inglês** para separar, somente em inglês])",
    "Video Settings": "**Configurações do Vídeo**",
    "Video Concat Mode": "Modo de Concatenação de Vídeo",
    "Random": "Concatenação Aleatória (Recomendado)",
    "Sequential": "Concatenação Sequencial",
    "Video Ratio": "Proporção do Vídeo",
    "Portrait": "Retrato 9:16",
    "Landscape": "Paisagem 16:9",
    "Clip Duration": "Duração Máxima dos Clipes de Vídeo (segundos)",
    "Number of Videos Generated Simultaneously": "Número de Vídeos Gerados Simultaneamente",
    "Audio Settings": "**Configurações de Áudio**",
    "Speech Synthesis": "Voz de Síntese de Fala",
    "Speech Region": "Região(:red[Obrigatório，[Obter Região](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "Chave da API(:red[Obrigatório，[Obter Chave da API](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "Volume da Fala (1.0 representa 100%)",
    "Speech Rate": "Velocidade da Fala (1.0 significa velocidade 1x)",
    "Male": "Masculino",
    "Female": "Feminino",
    "Background Music": "Música de Fundo",
    "No Background Music": "Sem Música de Fundo",
    "Random Background Music": "Música de Fundo Aleatória",
    "Custom Background Music": "Música de Fundo Personalizada",
    "Custom Background Music File": "Por favor, insira o caminho do arquivo para a música de fundo personalizada:",
    "Background Music Volume": "Volume da Música de Fundo (0.2 representa 20%, a música de fundo não deve ser muito alta)",
    "Subtitle Settings": "**Configurações de Legendas**",
    "Enable Subtitles": "Ativar Legendas (Se desmarcado, as configurações abaixo não terão efeito)",
    "Font": "Fonte da Legenda",
    "Position": "Posição da Legenda",
    "Top": "Superior",
    "Center": "Centralizar",
    "Bottom": "Inferior (Recomendado)",
    "Custom": "Posição personalizada (70, indicando 70% abaixo do topo)",
    "Font Size": "Tamanho da Fonte da Legenda",
    "Font Color": "Cor da Fonte da Legenda",
    "Stroke Color": "Cor do Contorno da Legenda",
    "Stroke Width": "Largura do Contorno da Legenda",
    "Generate Video": "Gerar Vídeo",
    "Video Script and Subject Cannot Both Be Empty": "O Tema do Vídeo e o Roteiro do Vídeo não podem estar ambos vazios",
    "Generating Video": "Gerando vídeo, por favor aguarde...",
    "Start Generating Video": "Começar a Gerar Vídeo",
    "Video Generation Completed": "Geração do Vídeo Concluída",
    "Video Generation Failed": "Falha na Geração do Vídeo",
    "You can download the generated video from the following links": "Você pode baixar o vídeo gerado a partir dos seguintes links",
    "Pexels API Key": "Chave da API do Pexels ([Obter Chave da API](https://www.pexels.com/api/))",
    "Pixabay API Key": "Chave da API do Pixabay ([Obter Chave da API](https://pixabay.com/api/docs/#api_search_videos))",
    "Basic Settings": "**Configurações Básicas** (:blue[Clique para expandir])",
    "Language": "Idioma",
    "LLM Provider": "Provedor LLM",
    "API Key": "Chave da API (:red[Obrigatório])",
    "Base Url": "URL Base",
    "Account ID": "ID da Conta (Obter no painel do Cloudflare)",
    "Model Name": "Nome do Modelo",
    "Please Enter the LLM API Key": "Por favor, insira a **Chave da API LLM**",
    "Please Enter the Pexels API Key": "Por favor, insira a **Chave da API do Pexels**",
    "Please Enter the Pixabay API Key": "Por favor, insira a **Chave da API do Pixabay**",
    "Get Help": "Se precisar de ajuda ou tiver alguma dúvida, você pode entrar no discord para obter ajuda: https://harryai.cc",
    "Video Source": "Fonte do Vídeo",
    "TikTok": "TikTok (Suporte para TikTok em breve)",
    "Bilibili": "Bilibili (Suporte para Bilibili em breve)",
    "Xiaohongshu": "Xiaohongshu (Suporte para Xiaohongshu em breve)",
    "Local file": "Arquivo local",
    "Play Voice": "Reproduzir Voz",
    "Voice Example": "Este é um exemplo de texto para testar a síntese de fala",
    "Synthesizing Voice": "Sintetizando voz, por favor aguarde...",
    "TTS Provider": "Selecione o provedor de síntese de voz",
    "Hide Log": "Ocultar Log"
  }
 }
--- a/webui/i18n/vi.json
+++ b/webui/i18n/vi.json
@@ -26,6 +26,7 @@
    "Speech Region": "Vùng(:red[Bắt Buộc，[Lấy Vùng](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "Khóa API(:red[Bắt Buộc，[Lấy Khóa API](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "Âm Lượng Giọng Đọc (1.0 đại diện cho 100%)",
    "Speech Rate": "Tốc độ đọc (1.0 biểu thị tốc độ gốc)",
    "Male": "Nam",
    "Female": "Nữ",
    "Background Music": "Âm Nhạc Nền",
@@ -41,6 +42,7 @@
    "Top": "Trên",
    "Center": "Giữa",
    "Bottom": "Dưới (Được Khuyến Nghị)",
    "Custom": "Vị trí tùy chỉnh (70, chỉ ra là cách đầu trang 70%)",
    "Font Size": "Cỡ Chữ Phụ Đề",
    "Font Color": "Màu Chữ Phụ Đề",
    "Stroke Color": "Màu Viền Phụ Đề",
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@@ -26,6 +26,7 @@
    "Speech Region": "服务区域 (:red[必填，[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "API Key (:red[必填，密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "朗读音量（1.0表示100%）",
    "Speech Rate": "朗读速度（1.0表示1倍速）",
    "Male": "男性",
    "Female": "女性",
    "Background Music": "背景音乐",
@@ -41,6 +42,7 @@
    "Top": "顶部",
    "Center": "中间",
    "Bottom": "底部（推荐）",
    "Custom": "自定义位置（70，表示离顶部70%的位置）",
    "Font Size": "字幕大小",
    "Font Color": "字幕颜色",
    "Stroke Color": "描边颜色",
@@ -54,8 +56,8 @@
    "You can download the generated video from the following links": "你可以从以下链接下载生成的视频",
    "Basic Settings": "**基础设置** (:blue[点击展开])",
    "Language": "界面语言",
-    "Pexels API Key": "Pexels API Key ([点击获取](https://www.pexels.com/api/))",
+    "Pexels API Key": "Pexels API Key ([点击获取](https://www.pexels.com/api/)) :red[推荐使用]",
-    "Pixabay API Key": "Pixabay API Key ([点击获取](https://pixabay.com/api/docs/#api_search_videos))",
+    "Pixabay API Key": "Pixabay API Key ([点击获取](https://pixabay.com/api/docs/#api_search_videos)) :red[可以不用配置，如果 Pexels 无法使用，再选择Pixabay]",
    "LLM Provider": "大模型提供商",
    "API Key": "API Key (:red[必填，需要到大模型提供商的后台申请])",
    "Base Url": "Base Url (可选)",
@@ -73,6 +75,7 @@
    "Play Voice": "试听语音合成",
    "Voice Example": "这是一段测试语音合成的示例文本",
    "Synthesizing Voice": "语音合成中，请稍候...",
-    "TTS Provider": "语音合成提供商"
+    "TTS Provider": "语音合成提供商",
    "Hide Log": "隐藏日志"
  }
 }
Author	SHA1	Message	Date
Harry	63c3402c94	Update version to 1.2.2	2024-12-06 13:45:43 +08:00
Harry	5a6dd6c7a5	Merge pull request #541 from yyhhyyyyyy/update-requirements ⬆️ deps: Upgrade dependencies to latest versions and address minor issues	2024-12-05 11:02:14 +08:00
yyhhyy	8c226322a0	Merge branch 'main' into update-requirements	2024-12-05 10:59:41 +08:00
Harry	3a7888937f	Merge pull request #536 from Felix3322/main better requirements.txt	2024-12-05 10:47:26 +08:00
yyhhyyyyyy	6760a0ad00	📝 docs: Update documentation	2024-12-05 10:34:09 +08:00
yyhhyyyyyy	6288b70ae2	⬆️ deps: Upgrade dependencies to latest versions and address minor issues	2024-12-05 10:16:38 +08:00
Jiaying Liu	4adc010388	Update requirements.txt	2024-11-27 15:04:46 -05:00
Harry	162b5e17c3	Merge pull request #508 from flingjie/main allow api key empty when using ollama	2024-11-20 15:45:40 +08:00
Harry	0d43ba2124	Merge pull request #505 from LucasHenriqueDiniz/main feat: add PT-BR translation	2024-11-20 15:45:18 +08:00
Harry	080d8d82b4	Merge pull request #504 from Dreyfi/fix-403-error-pexels-request Fix the response 403 from pexels - search_videos_pexels - failed to download videos, maybe the network is not available. if you are in China, please use a VPN.	2024-11-20 15:44:46 +08:00
Harry	fc50e16bc5	Merge pull request #486 from FLY-Open-AI/main [Readme]Docker部署，启动命令优化。	2024-11-20 15:44:08 +08:00
Jie.F	345b6d59a1	allow api key empty when using ollama the ollama API key is not required	2024-10-08 09:44:39 +08:00
Dreyfi	4ec19fd56a	Add headers with user_agent to save_video request	2024-09-30 15:48:54 +10:00
Lucas Diniz	136630ec60	feat: add PT-BR translation	2024-09-29 19:30:12 -03:00
Dreyfi	9d3d99a595	Fix the response 403 from pexels search_videos_pexels - failed to download videos, maybe the network is not available. if you are in China, please use a VPN.	2024-09-28 16:25:53 +10:00
wangyanfei	747c745ec0	[Readme]Docker部署，启动命令优化。最新版的docker安装时会自动以插件的形式安装docker compose，启动命令调整为docker compose up	2024-08-31 07:22:05 +08:00
Harry	a53ca843e8	Merge pull request #467 from harry0703/dev update readme	2024-07-26 18:23:52 +08:00
harry	8b18d84d8a	update readme	2024-07-26 18:23:04 +08:00
Harry	edc4df6eb5	Merge pull request #466 from harry0703/dev fixed: subtitle generation failure	2024-07-26 17:56:32 +08:00
harry	5ed98d317c	fixed: subtitle generation failure	2024-07-26 17:55:26 +08:00
Harry	c22ef5f1d2	Merge pull request #462 from harry0703/dev update readme	2024-07-25 15:00:07 +08:00
harry	bcc9621976	update readme	2024-07-25 14:59:45 +08:00
Harry	6512e3f140	Merge pull request #461 from harry0703/dev Optimize memory usage in moviepy	2024-07-25 13:58:46 +08:00
harry	931e1a0caa	Optimize memory usage in moviepy Upgrade version number to 1.2.0	2024-07-25 13:57:39 +08:00
yyhhyy	84ae8e5248	Merge pull request #460 from yyhhyyyyyy/code-formatting Code Formatting	2024-07-25 13:39:05 +08:00
yyhhyyyyyy	5c2db3aa92	resolve issue with video concatenation order always being random	2024-07-25 13:36:21 +08:00
yyhhyyyyyy	905841965a	Format project code	2024-07-24 14:59:06 +08:00
Harry	bbd4e94941	Merge pull request #459 from yyhhyyyyyy/customize-subtitle-position feat: support custom subtitle positioning	2024-07-24 14:35:50 +08:00
yyhhyyyyyy	b89250874b	Change default value to 70.0	2024-07-24 14:31:56 +08:00
yyhhyyyyyy	e8b20c697d	feat: support custom subtitle positioning	2024-07-24 14:25:20 +08:00
Harry	e64041c93d	Merge pull request #458 from yyhhyyyyyy/refactor-task-add-subtitle-api Refactor task.py and add subtitle API	2024-07-24 11:47:27 +08:00
yyhhyyyyyy	17b4a61e64	1.Refactor task.py to encapsulate separable functions. 2.Add a new subtitle API.	2024-07-23 17:00:23 +08:00
Harry	6d520a4266	Merge pull request #453 from yyhhyyyyyy/fit-oneapi fit(oneapi):Fix the issue where model_name is always empty when using OneAPI as the LLM source.	2024-07-22 10:38:10 +08:00
yyhhyyyyyy	7ff8467f9d	Fix the issue where model_name is always empty when using OneAPI as the LLM source.	2024-07-20 09:36:19 +08:00
Harry	4cf9cefb5c	Merge pull request #450 from yyhhyyyyyy/fit-subtitle-correct fit(subtitle):Fix subtitle correction logic	2024-07-20 08:25:25 +08:00
yyhhyyyyyy	33534db8bb	1. .gitignore ignores the models folder 2. Fix subtitle correction logic	2024-07-19 15:00:17 +08:00
Harry	ec16f1c41b	Merge pull request #449 from harry0703/dev update readme	2024-07-19 14:21:56 +08:00
harry	9653d7d18a	update readme	2024-07-19 14:21:35 +08:00
Harry	36a367d713	Merge pull request #448 from yyhhyyyyyy/add-rate feat(azure_tts_v1): Allows to control the speed of speech generation.	2024-07-19 14:17:15 +08:00
yyhhyyyyyy	77b304537a	Speech Rate	2024-07-19 11:15:36 +08:00
yyhhyyyyyy	63fb848a17	1. Add azure_tts_v1 to control the speed of speech	2024-07-19 11:06:34 +08:00
Harry	6853163905	Merge pull request #447 from harry0703/dev update readme	2024-07-15 14:09:55 +08:00
harry	052c29b579	update readme	2024-07-15 14:09:33 +08:00
Harry	df62529f2a	Merge pull request #443 from harry0703/dev update readme	2024-07-09 13:41:04 +08:00
harry	934eff13ae	update readme	2024-07-09 13:40:43 +08:00
Harry	0472338184	Merge pull request #437 from harry0703/dev support baidu ERNIE llm	2024-07-03 21:13:51 +08:00
harry	66c81a04bf	support baidu ERNIE llm	2024-07-03 21:12:21 +08:00
Harry	8dd66cf624	Merge pull request #435 from harry0703/dev update readme	2024-07-02 10:00:53 +08:00
harry	dca23d99e4	update readme	2024-07-02 09:57:53 +08:00
Harry	42560cc7f5	Merge pull request #421 from harry0703/dev update readme	2024-06-21 11:01:41 +08:00
harry	11478063e7	update readme	2024-06-21 11:01:15 +08:00
Harry	bf0dbcc045	Merge pull request #414 from harry0703/dev update readme	2024-06-15 17:37:36 +08:00
harry	43df593ac3	update readme	2024-06-15 17:36:37 +08:00
Harry	7cf21c6541	Merge pull request #408 from harry0703/dev update readme	2024-06-11 11:50:48 +08:00
harry	f76f905833	update readme	2024-06-11 11:48:04 +08:00
Harry	0f27c26042	Merge pull request #399 from harry0703/dev update readme	2024-06-04 10:36:18 +08:00
harry	e1d7318cee	update readme	2024-06-04 10:34:32 +08:00
Harry	6408c31b7f	Merge pull request #391 from harry0703/dev update readme	2024-05-28 18:41:24 +08:00
harry	b0d694db08	update readme	2024-05-28 14:51:03 +08:00
Harry	730c2a461a	Merge pull request #381 from harry0703/dev update readme	2024-05-23 18:21:05 +08:00
harry	bdb49a4c82	update readme	2024-05-23 18:20:45 +08:00
Harry	a4692060a0	Merge pull request #372 from harry0703/dev enhanced exception handling for generating terms	2024-05-17 17:12:13 +08:00
harry	fc6844dd19	enhanced exception handling for generating terms	2024-05-17 17:11:35 +08:00
Harry	d740a6babd	Merge pull request #370 from harry0703/dev update readme	2024-05-17 08:44:01 +08:00
harry	9c58991830	update readme	2024-05-17 08:43:35 +08:00