Merge pull request #466 from harry0703/dev

fixed: subtitle generation failure
2024-07-26 17:56:32 +08:00 · 2024-07-26 17:55:26 +08:00 · 2024-07-25 15:00:07 +08:00 · 2024-07-25 14:59:45 +08:00 · 2024-07-25 13:58:46 +08:00 · 2024-07-25 13:57:39 +08:00
32 changed files with 1334 additions and 619 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,6 @@ node_modules
 /sites/docs/.vuepress/.cache
 # VuePress 默认构建生成的静态文件目录
 /sites/docs/.vuepress/dist
+# 模型目录
+/models/
+./models/*
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@
 - [x] 支持 **背景音乐**，随机或者指定音乐文件，可设置`背景音乐音量`
 - [x] 视频素材来源 **高清**，而且 **无版权**，也可以使用自己的 **本地素材**
 - [x] 支持 **OpenAI**、**Moonshot**、**Azure**、**gpt4free**、**one-api**、**通义千问**、**Google Gemini**、**Ollama**、
-  **DeepSeek** 等多种模型接入
+  **DeepSeek**、 **文心一言** 等多种模型接入
    - 中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商（国内可直接访问，不需要VPN。注册就送额度，基本够用）

 ### 后期计划 📅
@@ -121,11 +121,10 @@

 ## 快速开始 🚀

-下载一键启动包，解压直接使用（路径不要有 **中文** 和 **空格**）
+下载一键启动包，解压直接使用（路径不要有 **中文**、**特殊字符**、**空格**）

 ### Windows
-
- 百度网盘: https://pan.baidu.com/s/1jKF1mgsjfN8fBk6uTEHArQ?pwd=jrp7 提取码: jrp7
+- 百度网盘（1.2.0 最新版本）: https://pan.baidu.com/s/1gVmFSCleHybiIiW-8ETk_A?pwd=dwt8 提取码: dwt8

 下载后，建议先**双击执行** `update.bat` 更新到**最新代码**，然后双击 `start.bat` 启动

--- a/app/asgi.py
+++ b/app/asgi.py
@@ -1,4 +1,5 @@
 """Application implementation - ASGI."""
+
 import os

 from fastapi import FastAPI, Request
@@ -24,7 +25,9 @@ def exception_handler(request: Request, e: HttpException):
 def validation_exception_handler(request: Request, e: RequestValidationError):
    return JSONResponse(
        status_code=400,
-        content=utils.get_response(status=400, data=e.errors(), message='field required'),
+        content=utils.get_response(
+            status=400, data=e.errors(), message="field required"
+        ),
    )


@@ -61,7 +64,9 @@ app.add_middleware(
 )

 task_dir = utils.task_dir()
-app.mount("/tasks", StaticFiles(directory=task_dir, html=True, follow_symlink=True), name="")
+app.mount(
+    "/tasks", StaticFiles(directory=task_dir, html=True, follow_symlink=True), name=""
+)

 public_dir = utils.public_dir()
 app.mount("/", StaticFiles(directory=public_dir, html=True), name="")
--- a/app/config/init.py
+++ b/app/config/init.py
@@ -10,7 +10,9 @@ from app.utils import utils
 def __init_logger():
    # _log_file = utils.storage_dir("logs/server.log")
    _lvl = config.log_level
-    root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+    root_dir = os.path.dirname(
+        os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+    )

    def format_record(record):
        # 获取日志记录中的文件全路径
@@ -21,10 +23,13 @@ def __init_logger():
        record["file"].path = f"./{relative_path}"
        # 返回修改后的格式字符串
        # 您可以根据需要调整这里的格式
-        _format = '<green>{time:%Y-%m-%d %H:%M:%S}</> | ' + \
-                  '<level>{level}</> | ' + \
-                  '"{file.path}:{line}":<blue> {function}</> ' + \
-                  '- <level>{message}</>' + "\n"
+        _format = (
+            "<green>{time:%Y-%m-%d %H:%M:%S}</> | "
+            + "<level>{level}</> | "
+            + '"{file.path}:{line}":<blue> {function}</> '
+            + "- <level>{message}</>"
+            + "\n"
+        )
        return _format

    logger.remove()
--- a/app/config/config.py
+++ b/app/config/config.py
@@ -25,7 +25,7 @@ def load_config():
        _config_ = toml.load(config_file)
    except Exception as e:
        logger.warning(f"load config failed: {str(e)}, try to load as utf-8-sig")
-        with open(config_file, mode="r", encoding='utf-8-sig') as fp:
+        with open(config_file, mode="r", encoding="utf-8-sig") as fp:
            _cfg_content = fp.read()
            _config_ = toml.loads(_cfg_content)
    return _config_
@@ -52,9 +52,11 @@ log_level = _cfg.get("log_level", "DEBUG")
 listen_host = _cfg.get("listen_host", "0.0.0.0")
 listen_port = _cfg.get("listen_port", 8080)
 project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
-project_description = _cfg.get("project_description",
-                               "<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>")
-project_version = _cfg.get("project_version", "1.1.9")
+project_description = _cfg.get(
+    "project_description",
+    "<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>",
+)
+project_version = _cfg.get("project_version", "1.2.1")
 reload_debug = False

 imagemagick_path = app.get("imagemagick_path", "")
--- a/app/controllers/base.py
+++ b/app/controllers/base.py
@@ -7,14 +7,14 @@ from app.models.exception import HttpException


 def get_task_id(request: Request):
-    task_id = request.headers.get('x-task-id')
+    task_id = request.headers.get("x-task-id")
    if not task_id:
        task_id = uuid4()
    return str(task_id)


 def get_api_key(request: Request):
-    api_key = request.headers.get('x-api-key')
+    api_key = request.headers.get("x-api-key")
    return api_key


@@ -23,5 +23,9 @@ def verify_token(request: Request):
    if token != config.app.get("api_key", ""):
        request_id = get_task_id(request)
        request_url = request.url
-        user_agent = request.headers.get('user-agent')
-        raise HttpException(task_id=request_id, status_code=401, message=f"invalid token: {request_url}, {user_agent}")
+        user_agent = request.headers.get("user-agent")
+        raise HttpException(
+            task_id=request_id,
+            status_code=401,
+            message=f"invalid token: {request_url}, {user_agent}",
+        )
--- a/app/controllers/manager/base_manager.py
+++ b/app/controllers/manager/base_manager.py
@@ -18,11 +18,15 @@ class TaskManager:
                print(f"add task: {func.__name__}, current_tasks: {self.current_tasks}")
                self.execute_task(func, *args, **kwargs)
            else:
-                print(f"enqueue task: {func.__name__}, current_tasks: {self.current_tasks}")
+                print(
+                    f"enqueue task: {func.__name__}, current_tasks: {self.current_tasks}"
+                )
                self.enqueue({"func": func, "args": args, "kwargs": kwargs})

    def execute_task(self, func: Callable, *args: Any, **kwargs: Any):
-        thread = threading.Thread(target=self.run_task, args=(func, *args), kwargs=kwargs)
+        thread = threading.Thread(
+            target=self.run_task, args=(func, *args), kwargs=kwargs
+        )
        thread.start()

    def run_task(self, func: Callable, *args: Any, **kwargs: Any):
@@ -35,11 +39,14 @@ class TaskManager:

    def check_queue(self):
        with self.lock:
-            if self.current_tasks < self.max_concurrent_tasks and not self.is_queue_empty():
+            if (
+                self.current_tasks < self.max_concurrent_tasks
+                and not self.is_queue_empty()
+            ):
                task_info = self.dequeue()
-                func = task_info['func']
-                args = task_info.get('args', ())
-                kwargs = task_info.get('kwargs', {})
+                func = task_info["func"]
+                args = task_info.get("args", ())
+                kwargs = task_info.get("kwargs", {})
                self.execute_task(func, *args, **kwargs)

    def task_done(self):
--- a/app/controllers/manager/redis_manager.py
+++ b/app/controllers/manager/redis_manager.py
@@ -8,7 +8,7 @@ from app.models.schema import VideoParams
 from app.services import task as tm

 FUNC_MAP = {
-    'start': tm.start,
+    "start": tm.start,
    # 'start_test': tm.start_test
 }

@@ -24,11 +24,15 @@ class RedisTaskManager(TaskManager):
    def enqueue(self, task: Dict):
        task_with_serializable_params = task.copy()

-        if 'params' in task['kwargs'] and isinstance(task['kwargs']['params'], VideoParams):
-            task_with_serializable_params['kwargs']['params'] = task['kwargs']['params'].dict()
+        if "params" in task["kwargs"] and isinstance(
+            task["kwargs"]["params"], VideoParams
+        ):
+            task_with_serializable_params["kwargs"]["params"] = task["kwargs"][
+                "params"
+            ].dict()

        # 将函数对象转换为其名称
-        task_with_serializable_params['func'] = task['func'].__name__
+        task_with_serializable_params["func"] = task["func"].__name__
        self.redis_client.rpush(self.queue, json.dumps(task_with_serializable_params))

    def dequeue(self):
@@ -36,10 +40,14 @@ class RedisTaskManager(TaskManager):
        if task_json:
            task_info = json.loads(task_json)
            # 将函数名称转换回函数对象
-            task_info['func'] = FUNC_MAP[task_info['func']]
+            task_info["func"] = FUNC_MAP[task_info["func"]]

-            if 'params' in task_info['kwargs'] and isinstance(task_info['kwargs']['params'], dict):
-                task_info['kwargs']['params'] = VideoParams(**task_info['kwargs']['params'])
+            if "params" in task_info["kwargs"] and isinstance(
+                task_info["kwargs"]["params"], dict
+            ):
+                task_info["kwargs"]["params"] = VideoParams(
+                    **task_info["kwargs"]["params"]
+                )

            return task_info
        return None
--- a/app/controllers/ping.py
+++ b/app/controllers/ping.py
@@ -4,6 +4,11 @@ from fastapi import Request
 router = APIRouter()


-@router.get("/ping", tags=["Health Check"], description="检查服务可用性", response_description="pong")
+@router.get(
+    "/ping",
+    tags=["Health Check"],
+    description="检查服务可用性",
+    response_description="pong",
+)
 def ping(request: Request) -> str:
    return "pong"
--- a/app/controllers/v1/base.py
+++ b/app/controllers/v1/base.py
@@ -3,8 +3,8 @@ from fastapi import APIRouter, Depends

 def new_router(dependencies=None):
    router = APIRouter()
-    router.tags = ['V1']
-    router.prefix = '/api/v1'
+    router.tags = ["V1"]
+    router.prefix = "/api/v1"
    # 将认证依赖项应用于所有路由
    if dependencies:
        router.dependencies = dependencies
--- a/app/controllers/v1/llm.py
+++ b/app/controllers/v1/llm.py
@@ -1,6 +1,11 @@
 from fastapi import Request
 from app.controllers.v1.base import new_router
-from app.models.schema import VideoScriptResponse, VideoScriptRequest, VideoTermsResponse, VideoTermsRequest
+from app.models.schema import (
+    VideoScriptResponse,
+    VideoScriptRequest,
+    VideoTermsResponse,
+    VideoTermsRequest,
+)
 from app.services import llm
 from app.utils import utils

@@ -9,23 +14,31 @@ from app.utils import utils
 router = new_router()


-@router.post("/scripts", response_model=VideoScriptResponse, summary="Create a script for the video")
+@router.post(
+    "/scripts",
+    response_model=VideoScriptResponse,
+    summary="Create a script for the video",
+)
 def generate_video_script(request: Request, body: VideoScriptRequest):
-    video_script = llm.generate_script(video_subject=body.video_subject,
-                                       language=body.video_language,
-                                       paragraph_number=body.paragraph_number)
-    response = {
-        "video_script": video_script
-    }
+    video_script = llm.generate_script(
+        video_subject=body.video_subject,
+        language=body.video_language,
+        paragraph_number=body.paragraph_number,
+    )
+    response = {"video_script": video_script}
    return utils.get_response(200, response)


-@router.post("/terms", response_model=VideoTermsResponse, summary="Generate video terms based on the video script")
+@router.post(
+    "/terms",
+    response_model=VideoTermsResponse,
+    summary="Generate video terms based on the video script",
+)
 def generate_video_terms(request: Request, body: VideoTermsRequest):
-    video_terms = llm.generate_terms(video_subject=body.video_subject,
-                                     video_script=body.video_script,
-                                     amount=body.amount)
-    response = {
-        "video_terms": video_terms
-    }
+    video_terms = llm.generate_terms(
+        video_subject=body.video_subject,
+        video_script=body.video_script,
+        amount=body.amount,
+    )
+    response = {"video_terms": video_terms}
    return utils.get_response(200, response)
--- a/app/controllers/v1/video.py
+++ b/app/controllers/v1/video.py
@@ -1,11 +1,12 @@
-import os
 import glob
+import os
 import pathlib
 import shutil
+from typing import Union

-from fastapi import Request, Depends, Path, BackgroundTasks, UploadFile
-from fastapi.responses import FileResponse, StreamingResponse
+from fastapi import BackgroundTasks, Depends, Path, Request, UploadFile
 from fastapi.params import File
+from fastapi.responses import FileResponse, StreamingResponse
 from loguru import logger

 from app.config import config
@@ -14,10 +15,19 @@ from app.controllers.manager.memory_manager import InMemoryTaskManager
 from app.controllers.manager.redis_manager import RedisTaskManager
 from app.controllers.v1.base import new_router
 from app.models.exception import HttpException
-from app.models.schema import TaskVideoRequest, TaskQueryResponse, TaskResponse, TaskQueryRequest, \
-    BgmUploadResponse, BgmRetrieveResponse, TaskDeletionResponse
-from app.services import task as tm
+from app.models.schema import (
+    AudioRequest,
+    BgmRetrieveResponse,
+    BgmUploadResponse,
+    SubtitleRequest,
+    TaskDeletionResponse,
+    TaskQueryRequest,
+    TaskQueryResponse,
+    TaskResponse,
+    TaskVideoRequest,
+)
 from app.services import state as sm
+from app.services import task as tm
 from app.utils import utils

 # 认证依赖项
@@ -34,48 +44,65 @@ _max_concurrent_tasks = config.app.get("max_concurrent_tasks", 5)
 redis_url = f"redis://:{_redis_password}@{_redis_host}:{_redis_port}/{_redis_db}"
 # 根据配置选择合适的任务管理器
 if _enable_redis:
-    task_manager = RedisTaskManager(max_concurrent_tasks=_max_concurrent_tasks, redis_url=redis_url)
+    task_manager = RedisTaskManager(
+        max_concurrent_tasks=_max_concurrent_tasks, redis_url=redis_url
+    )
 else:
    task_manager = InMemoryTaskManager(max_concurrent_tasks=_max_concurrent_tasks)

-# @router.post("/videos-test", response_model=TaskResponse, summary="Generate a short video")
-# async def create_video_test(request: Request, body: TaskVideoRequest):
-#     task_id = utils.get_uuid()
-#     request_id = base.get_task_id(request)
-#     try:
-#         task = {
-#             "task_id": task_id,
-#             "request_id": request_id,
-#             "params": body.dict(),
-#         }
-#         task_manager.add_task(tm.start_test, task_id=task_id, params=body)
-#         return utils.get_response(200, task)
-#     except ValueError as e:
-#         raise HttpException(task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}")
-

@router.post("/videos", response_model=TaskResponse, summary="Generate a short video")
-def create_video(background_tasks: BackgroundTasks, request: Request, body: TaskVideoRequest):
+def create_video(
+    background_tasks: BackgroundTasks, request: Request, body: TaskVideoRequest
+):
+    return create_task(request, body, stop_at="video")
+
+
+@router.post("/subtitle", response_model=TaskResponse, summary="Generate subtitle only")
+def create_subtitle(
+    background_tasks: BackgroundTasks, request: Request, body: SubtitleRequest
+):
+    return create_task(request, body, stop_at="subtitle")
+
+
+@router.post("/audio", response_model=TaskResponse, summary="Generate audio only")
+def create_audio(
+    background_tasks: BackgroundTasks, request: Request, body: AudioRequest
+):
+    return create_task(request, body, stop_at="audio")
+
+
+def create_task(
+    request: Request,
+    body: Union[TaskVideoRequest, SubtitleRequest, AudioRequest],
+    stop_at: str,
+):
    task_id = utils.get_uuid()
    request_id = base.get_task_id(request)
    try:
        task = {
            "task_id": task_id,
            "request_id": request_id,
-            "params": body.dict(),
+            "params": body.model_dump(),
        }
        sm.state.update_task(task_id)
-        # background_tasks.add_task(tm.start, task_id=task_id, params=body)
-        task_manager.add_task(tm.start, task_id=task_id, params=body)
-        logger.success(f"video created: {utils.to_json(task)}")
+        task_manager.add_task(tm.start, task_id=task_id, params=body, stop_at=stop_at)
+        logger.success(f"Task created: {utils.to_json(task)}")
        return utils.get_response(200, task)
    except ValueError as e:
-        raise HttpException(task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}")
+        raise HttpException(
+            task_id=task_id, status_code=400, message=f"{request_id}: {str(e)}"
+        )


-@router.get("/tasks/{task_id}", response_model=TaskQueryResponse, summary="Query task status")
-def get_task(request: Request, task_id: str = Path(..., description="Task ID"),
-             query: TaskQueryRequest = Depends()):
+@router.get(
+    "/tasks/{task_id}", response_model=TaskQueryResponse, summary="Query task status"
+)
+def get_task(
+    request: Request,
+    task_id: str = Path(..., description="Task ID"),
+    query: TaskQueryRequest = Depends(),
+):
    endpoint = config.app.get("endpoint", "")
    if not endpoint:
        endpoint = str(request.base_url)
@@ -108,10 +135,16 @@ def get_task(request: Request, task_id: str = Path(..., description="Task ID"),
            task["combined_videos"] = urls
        return utils.get_response(200, task)

-    raise HttpException(task_id=task_id, status_code=404, message=f"{request_id}: task not found")
+    raise HttpException(
+        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
+    )


-@router.delete("/tasks/{task_id}", response_model=TaskDeletionResponse, summary="Delete a generated short video task")
+@router.delete(
+    "/tasks/{task_id}",
+    response_model=TaskDeletionResponse,
+    summary="Delete a generated short video task",
+)
 def delete_video(request: Request, task_id: str = Path(..., description="Task ID")):
    request_id = base.get_task_id(request)
    task = sm.state.get_task(task_id)
@@ -125,32 +158,40 @@ def delete_video(request: Request, task_id: str = Path(..., description="Task ID
        logger.success(f"video deleted: {utils.to_json(task)}")
        return utils.get_response(200)

-    raise HttpException(task_id=task_id, status_code=404, message=f"{request_id}: task not found")
+    raise HttpException(
+        task_id=task_id, status_code=404, message=f"{request_id}: task not found"
+    )


-@router.get("/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files")
+@router.get(
+    "/musics", response_model=BgmRetrieveResponse, summary="Retrieve local BGM files"
+)
 def get_bgm_list(request: Request):
    suffix = "*.mp3"
    song_dir = utils.song_dir()
    files = glob.glob(os.path.join(song_dir, suffix))
    bgm_list = []
    for file in files:
-        bgm_list.append({
-            "name": os.path.basename(file),
-            "size": os.path.getsize(file),
-            "file": file,
-        })
-    response = {
-        "files": bgm_list
-    }
+        bgm_list.append(
+            {
+                "name": os.path.basename(file),
+                "size": os.path.getsize(file),
+                "file": file,
+            }
+        )
+    response = {"files": bgm_list}
    return utils.get_response(200, response)


-@router.post("/musics", response_model=BgmUploadResponse, summary="Upload the BGM file to the songs directory")
+@router.post(
+    "/musics",
+    response_model=BgmUploadResponse,
+    summary="Upload the BGM file to the songs directory",
+)
 def upload_bgm_file(request: Request, file: UploadFile = File(...)):
    request_id = base.get_task_id(request)
    # check file ext
-    if file.filename.endswith('mp3'):
+    if file.filename.endswith("mp3"):
        song_dir = utils.song_dir()
        save_path = os.path.join(song_dir, file.filename)
        # save file
@@ -158,26 +199,26 @@ def upload_bgm_file(request: Request, file: UploadFile = File(...)):
            # If the file already exists, it will be overwritten
            file.file.seek(0)
            buffer.write(file.file.read())
-        response = {
-            "file": save_path
-        }
+        response = {"file": save_path}
        return utils.get_response(200, response)

-    raise HttpException('', status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded")
+    raise HttpException(
+        "", status_code=400, message=f"{request_id}: Only *.mp3 files can be uploaded"
+    )


@router.get("/stream/{file_path:path}")
 async def stream_video(request: Request, file_path: str):
    tasks_dir = utils.task_dir()
    video_path = os.path.join(tasks_dir, file_path)
-    range_header = request.headers.get('Range')
+    range_header = request.headers.get("Range")
    video_size = os.path.getsize(video_path)
    start, end = 0, video_size - 1

    length = video_size
    if range_header:
-        range_ = range_header.split('bytes=')[1]
-        start, end = [int(part) if part else None for part in range_.split('-')]
+        range_ = range_header.split("bytes=")[1]
+        start, end = [int(part) if part else None for part in range_.split("-")]
        if start is None:
            start = video_size - end
            end = video_size - 1
@@ -186,7 +227,7 @@ async def stream_video(request: Request, file_path: str):
        length = end - start + 1

    def file_iterator(file_path, offset=0, bytes_to_read=None):
-        with open(file_path, 'rb') as f:
+        with open(file_path, "rb") as f:
            f.seek(offset, os.SEEK_SET)
            remaining = bytes_to_read or video_size
            while remaining > 0:
@@ -197,10 +238,12 @@ async def stream_video(request: Request, file_path: str):
                remaining -= len(data)
                yield data

-    response = StreamingResponse(file_iterator(video_path, start, length), media_type='video/mp4')
-    response.headers['Content-Range'] = f'bytes {start}-{end}/{video_size}'
-    response.headers['Accept-Ranges'] = 'bytes'
-    response.headers['Content-Length'] = str(length)
+    response = StreamingResponse(
+        file_iterator(video_path, start, length), media_type="video/mp4"
+    )
+    response.headers["Content-Range"] = f"bytes {start}-{end}/{video_size}"
+    response.headers["Accept-Ranges"] = "bytes"
+    response.headers["Content-Length"] = str(length)
    response.status_code = 206  # Partial Content

    return response
@@ -219,8 +262,10 @@ async def download_video(_: Request, file_path: str):
    file_path = pathlib.Path(video_path)
    filename = file_path.stem
    extension = file_path.suffix
-    headers = {
-        "Content-Disposition": f"attachment; filename={filename}{extension}"
-    }
-    return FileResponse(path=video_path, headers=headers, filename=f"{filename}{extension}",
-                        media_type=f'video/{extension[1:]}')
+    headers = {"Content-Disposition": f"attachment; filename={filename}{extension}"}
+    return FileResponse(
+        path=video_path,
+        headers=headers,
+        filename=f"{filename}{extension}",
+        media_type=f"video/{extension[1:]}",
+    )
--- a/app/models/const.py
+++ b/app/models/const.py
@@ -1,11 +1,25 @@
 PUNCTUATIONS = [
-    "?", ",", ".", "、", ";", ":", "!", "…",
-    "？", "，", "。", "、", "；", "：", "！", "...",
+    "?",
+    ",",
+    ".",
+    "、",
+    ";",
+    ":",
+    "!",
+    "…",
+    "？",
+    "，",
+    "。",
+    "、",
+    "；",
+    "：",
+    "！",
+    "...",
 ]

 TASK_STATE_FAILED = -1
 TASK_STATE_COMPLETE = 1
 TASK_STATE_PROCESSING = 4

-FILE_TYPE_VIDEOS = ['mp4', 'mov', 'mkv', 'webm']
-FILE_TYPE_IMAGES = ['jpg', 'jpeg', 'png', 'bmp']
+FILE_TYPE_VIDEOS = ["mp4", "mov", "mkv", "webm"]
+FILE_TYPE_IMAGES = ["jpg", "jpeg", "png", "bmp"]
--- a/app/models/exception.py
+++ b/app/models/exception.py
@@ -5,16 +5,18 @@ from loguru import logger


 class HttpException(Exception):
-    def __init__(self, task_id: str, status_code: int, message: str = '', data: Any = None):
+    def __init__(
+        self, task_id: str, status_code: int, message: str = "", data: Any = None
+    ):
        self.message = message
        self.status_code = status_code
        self.data = data
        # 获取异常堆栈信息
        tb_str = traceback.format_exc().strip()
        if not tb_str or tb_str == "NoneType: None":
-            msg = f'HttpException: {status_code}, {task_id}, {message}'
+            msg = f"HttpException: {status_code}, {task_id}, {message}"
        else:
-            msg = f'HttpException: {status_code}, {task_id}, {message}\n{tb_str}'
+            msg = f"HttpException: {status_code}, {task_id}, {message}\n{tb_str}"

        if status_code == 400:
            logger.warning(msg)
--- a/app/models/schema.py
+++ b/app/models/schema.py
@@ -1,12 +1,16 @@
+import warnings
 from enum import Enum
-from typing import Any, Optional, List
+from typing import Any, List, Optional

 import pydantic
 from pydantic import BaseModel
-import warnings

 # 忽略 Pydantic 的特定警告
-warnings.filterwarnings("ignore", category=UserWarning, message="Field name.*shadows an attribute in parent.*")
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message="Field name.*shadows an attribute in parent.*",
+)


 class VideoConcatMode(str, Enum):
@@ -61,7 +65,6 @@ class MaterialInfo:
 #     # "male-zh-TW-YunJheNeural",
 #
 #     # en-US
-#
 #     "female-en-US-AnaNeural",
 #     "female-en-US-AriaNeural",
 #     "female-en-US-AvaNeural",
@@ -93,6 +96,7 @@ class VideoParams(BaseModel):
      "stroke_width": 1.5
    }
    """
+
    video_subject: str
    video_script: str = ""  # 用于生成视频的脚本
    video_terms: Optional[str | list] = None  # 用于生成视频的关键词
@@ -108,12 +112,14 @@ class VideoParams(BaseModel):

    voice_name: Optional[str] = ""
    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.0
    bgm_type: Optional[str] = "random"
    bgm_file: Optional[str] = ""
    bgm_volume: Optional[float] = 0.2

    subtitle_enabled: Optional[bool] = True
    subtitle_position: Optional[str] = "bottom"  # top, bottom, center
+    custom_position: float = 70.0
    font_name: Optional[str] = "STHeitiMedium.ttc"
    text_fore_color: Optional[str] = "#FFFFFF"
    text_background_color: Optional[str] = "transparent"
@@ -125,6 +131,38 @@ class VideoParams(BaseModel):
    paragraph_number: Optional[int] = 1


+class SubtitleRequest(BaseModel):
+    video_script: str
+    video_language: Optional[str] = ""
+    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.2
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    subtitle_position: Optional[str] = "bottom"
+    font_name: Optional[str] = "STHeitiMedium.ttc"
+    text_fore_color: Optional[str] = "#FFFFFF"
+    text_background_color: Optional[str] = "transparent"
+    font_size: int = 60
+    stroke_color: Optional[str] = "#000000"
+    stroke_width: float = 1.5
+    video_source: Optional[str] = "local"
+    subtitle_enabled: Optional[str] = "true"
+
+
+class AudioRequest(BaseModel):
+    video_script: str
+    video_language: Optional[str] = ""
+    voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
+    voice_volume: Optional[float] = 1.0
+    voice_rate: Optional[float] = 1.2
+    bgm_type: Optional[str] = "random"
+    bgm_file: Optional[str] = ""
+    bgm_volume: Optional[float] = 0.2
+    video_source: Optional[str] = "local"
+
+
 class VideoScriptParams:
    """
    {
@@ -133,6 +171,7 @@ class VideoScriptParams:
      "paragraph_number": 1
    }
    """
+
    video_subject: Optional[str] = "春天的花海"
    video_language: Optional[str] = ""
    paragraph_number: Optional[int] = 1
@@ -146,14 +185,17 @@ class VideoTermsParams:
      "amount": 5
    }
    """
+
    video_subject: Optional[str] = "春天的花海"
-    video_script: Optional[str] = "春天的花海，如诗如画般展现在眼前。万物复苏的季节里，大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
+    video_script: Optional[str] = (
+        "春天的花海，如诗如画般展现在眼前。万物复苏的季节里，大地披上了一袭绚丽多彩的盛装。金黄的迎春、粉嫩的樱花、洁白的梨花、艳丽的郁金香……"
+    )
    amount: Optional[int] = 5


 class BaseResponse(BaseModel):
    status: int = 200
-    message: Optional[str] = 'success'
+    message: Optional[str] = "success"
    data: Any = None


@@ -188,9 +230,7 @@ class TaskResponse(BaseResponse):
            "example": {
                "status": 200,
                "message": "success",
-                "data": {
-                    "task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"
-                }
+                "data": {"task_id": "6c85c8cc-a77a-42b9-bc30-947815aa0558"},
            },
        }

@@ -209,8 +249,8 @@ class TaskQueryResponse(BaseResponse):
                    ],
                    "combined_videos": [
                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
-                    ]
-                }
+                    ],
+                },
            },
        }

@@ -229,8 +269,8 @@ class TaskDeletionResponse(BaseResponse):
                    ],
                    "combined_videos": [
                        "http://127.0.0.1:8080/tasks/6c85c8cc-a77a-42b9-bc30-947815aa0558/combined-1.mp4"
-                    ]
-                }
+                    ],
+                },
            },
        }

@@ -243,7 +283,7 @@ class VideoScriptResponse(BaseResponse):
                "message": "success",
                "data": {
                    "video_script": "春天的花海，是大自然的一幅美丽画卷。在这个季节里，大地复苏，万物生长，花朵争相绽放，形成了一片五彩斑斓的花海..."
-                }
+                },
            },
        }

@@ -254,9 +294,7 @@ class VideoTermsResponse(BaseResponse):
            "example": {
                "status": 200,
                "message": "success",
-                "data": {
-                    "video_terms": ["sky", "tree"]
-                }
+                "data": {"video_terms": ["sky", "tree"]},
            },
        }

@@ -272,10 +310,10 @@ class BgmRetrieveResponse(BaseResponse):
                        {
                            "name": "output013.mp3",
                            "size": 1891269,
-                            "file": "/MoneyPrinterTurbo/resource/songs/output013.mp3"
+                            "file": "/MoneyPrinterTurbo/resource/songs/output013.mp3",
                        }
                    ]
-                }
+                },
            },
        }

@@ -286,8 +324,6 @@ class BgmUploadResponse(BaseResponse):
            "example": {
                "status": 200,
                "message": "success",
-                "data": {
-                    "file": "/MoneyPrinterTurbo/resource/songs/example.mp3"
-                }
+                "data": {"file": "/MoneyPrinterTurbo/resource/songs/example.mp3"},
            },
        }
--- a/app/router.py
+++ b/app/router.py
@@ -6,9 +6,10 @@ Resources:
    1. https://fastapi.tiangolo.com/tutorial/bigger-applications

 """
+
 from fastapi import APIRouter

-from app.controllers.v1 import video, llm
+from app.controllers.v1 import llm, video

 root_api_router = APIRouter()
 # v1
--- a/app/services/llm.py
+++ b/app/services/llm.py
@@ -21,6 +21,7 @@ def _generate_response(prompt: str) -> str:
        if not model_name:
            model_name = "gpt-3.5-turbo-16k-0613"
        import g4f
+
        content = g4f.ChatCompletion.create(
            model=model_name,
            messages=[{"role": "user", "content": prompt}],
@@ -72,43 +73,62 @@ def _generate_response(prompt: str) -> str:
            base_url = config.app.get("deepseek_base_url")
            if not base_url:
                base_url = "https://api.deepseek.com"
+        elif llm_provider == "ernie":
+            api_key = config.app.get("ernie_api_key")
+            secret_key = config.app.get("ernie_secret_key")
+            base_url = config.app.get("ernie_base_url")
+            model_name = "***"
+            if not secret_key:
+                raise ValueError(
+                    f"{llm_provider}: secret_key is not set, please set it in the config.toml file."
+                )
        else:
-            raise ValueError("llm_provider is not set, please set it in the config.toml file.")
+            raise ValueError(
+                "llm_provider is not set, please set it in the config.toml file."
+            )

        if not api_key:
-            raise ValueError(f"{llm_provider}: api_key is not set, please set it in the config.toml file.")
+            raise ValueError(
+                f"{llm_provider}: api_key is not set, please set it in the config.toml file."
+            )
        if not model_name:
-            raise ValueError(f"{llm_provider}: model_name is not set, please set it in the config.toml file.")
+            raise ValueError(
+                f"{llm_provider}: model_name is not set, please set it in the config.toml file."
+            )
        if not base_url:
-            raise ValueError(f"{llm_provider}: base_url is not set, please set it in the config.toml file.")
+            raise ValueError(
+                f"{llm_provider}: base_url is not set, please set it in the config.toml file."
+            )

        if llm_provider == "qwen":
            import dashscope
            from dashscope.api_entities.dashscope_response import GenerationResponse
+
            dashscope.api_key = api_key
            response = dashscope.Generation.call(
-                model=model_name,
-                messages=[{"role": "user", "content": prompt}]
+                model=model_name, messages=[{"role": "user", "content": prompt}]
            )
            if response:
                if isinstance(response, GenerationResponse):
                    status_code = response.status_code
                    if status_code != 200:
                        raise Exception(
-                            f"[{llm_provider}] returned an error response: \"{response}\"")
+                            f'[{llm_provider}] returned an error response: "{response}"'
+                        )

                    content = response["output"]["text"]
                    return content.replace("\n", "")
                else:
                    raise Exception(
-                        f"[{llm_provider}] returned an invalid response: \"{response}\"")
+                        f'[{llm_provider}] returned an invalid response: "{response}"'
+                    )
            else:
-                raise Exception(
-                    f"[{llm_provider}] returned an empty response")
+                raise Exception(f"[{llm_provider}] returned an empty response")

        if llm_provider == "gemini":
            import google.generativeai as genai
-            genai.configure(api_key=api_key, transport='rest')
+
+            genai.configure(api_key=api_key, transport="rest")

            generation_config = {
                "temperature": 0.5,
@@ -120,25 +140,27 @@ def _generate_response(prompt: str) -> str:
            safety_settings = [
                {
                    "category": "HARM_CATEGORY_HARASSMENT",
-                    "threshold": "BLOCK_ONLY_HIGH"
+                    "threshold": "BLOCK_ONLY_HIGH",
                },
                {
                    "category": "HARM_CATEGORY_HATE_SPEECH",
-                    "threshold": "BLOCK_ONLY_HIGH"
+                    "threshold": "BLOCK_ONLY_HIGH",
                },
                {
                    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
-                    "threshold": "BLOCK_ONLY_HIGH"
+                    "threshold": "BLOCK_ONLY_HIGH",
                },
                {
                    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
-                    "threshold": "BLOCK_ONLY_HIGH"
+                    "threshold": "BLOCK_ONLY_HIGH",
                },
            ]

-            model = genai.GenerativeModel(model_name=model_name,
-                                          generation_config=generation_config,
-                                          safety_settings=safety_settings)
+            model = genai.GenerativeModel(
+                model_name=model_name,
+                generation_config=generation_config,
+                safety_settings=safety_settings,
+            )

            try:
                response = model.generate_content(prompt)
@@ -151,20 +173,54 @@ def _generate_response(prompt: str) -> str:

        if llm_provider == "cloudflare":
            import requests
+
            response = requests.post(
                f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model_name}",
                headers={"Authorization": f"Bearer {api_key}"},
                json={
                    "messages": [
                        {"role": "system", "content": "You are a friendly assistant"},
-                        {"role": "user", "content": prompt}
+                        {"role": "user", "content": prompt},
                    ]
-                }
+                },
            )
            result = response.json()
            logger.info(result)
            return result["result"]["response"]

+        if llm_provider == "ernie":
+            import requests
+
+            params = {
+                "grant_type": "client_credentials",
+                "client_id": api_key,
+                "client_secret": secret_key,
+            }
+            access_token = (
+                requests.post("https://aip.baidubce.com/oauth/2.0/token", params=params)
+                .json()
+                .get("access_token")
+            )
+            url = f"{base_url}?access_token={access_token}"
+
+            payload = json.dumps(
+                {
+                    "messages": [{"role": "user", "content": prompt}],
+                    "temperature": 0.5,
+                    "top_p": 0.8,
+                    "penalty_score": 1,
+                    "disable_search": False,
+                    "enable_citation": False,
+                    "response_format": "text",
+                }
+            )
+            headers = {"Content-Type": "application/json"}
+
+            response = requests.request(
+                "POST", url, headers=headers, data=payload
+            ).json()
+            return response.get("result")
+
        if llm_provider == "azure":
            client = AzureOpenAI(
                api_key=api_key,
@@ -178,24 +234,27 @@ def _generate_response(prompt: str) -> str:
            )

        response = client.chat.completions.create(
-            model=model_name,
-            messages=[{"role": "user", "content": prompt}]
+            model=model_name, messages=[{"role": "user", "content": prompt}]
        )
        if response:
            if isinstance(response, ChatCompletion):
                content = response.choices[0].message.content
            else:
                raise Exception(
-                    f"[{llm_provider}] returned an invalid response: \"{response}\", please check your network "
-                    f"connection and try again.")
+                    f'[{llm_provider}] returned an invalid response: "{response}", please check your network '
+                    f"connection and try again."
+                )
        else:
            raise Exception(
-                f"[{llm_provider}] returned an empty response, please check your network connection and try again.")
+                f"[{llm_provider}] returned an empty response, please check your network connection and try again."
+            )

    return content.replace("\n", "")


-def generate_script(video_subject: str, language: str = "", paragraph_number: int = 1) -> str:
+def generate_script(
+    video_subject: str, language: str = "", paragraph_number: int = 1
+) -> str:
    prompt = f"""
 # Role: Video Script Generator

@@ -239,7 +298,7 @@ Generate a script for a video, depending on the subject of the video.
        selected_paragraphs = paragraphs[:paragraph_number]

        # Join the selected paragraphs into a single string
-        return "\n\n".join(selected_paragraphs)
+        return "\n\n".join(paragraphs)

    for i in range(_max_retries):
        try:
@@ -295,21 +354,27 @@ Please note that you must use English for generating video search terms; Chinese
    logger.info(f"subject: {video_subject}")

    search_terms = []
+    response = ""
    for i in range(_max_retries):
        try:
            response = _generate_response(prompt)
            search_terms = json.loads(response)
-            if not isinstance(search_terms, list) or not all(isinstance(term, str) for term in search_terms):
+            if not isinstance(search_terms, list) or not all(
+                isinstance(term, str) for term in search_terms
+            ):
                logger.error("response is not a list of strings.")
                continue

        except Exception as e:
-            match = re.search(r'\[.*]', response)
-            if match:
-                try:
-                    search_terms = json.loads(match.group())
-                except json.JSONDecodeError:
-                    pass
+            logger.warning(f"failed to generate video terms: {str(e)}")
+            if response:
+                match = re.search(r"\[.*]", response)
+                if match:
+                    try:
+                        search_terms = json.loads(match.group())
+                    except Exception as e:
+                        logger.warning(f"failed to generate video terms: {str(e)}")
+                        pass

        if search_terms and len(search_terms) > 0:
            break
@@ -322,9 +387,13 @@ Please note that you must use English for generating video search terms; Chinese

 if __name__ == "__main__":
    video_subject = "生命的意义是什么"
-    script = generate_script(video_subject=video_subject, language="zh-CN", paragraph_number=1)
+    script = generate_script(
+        video_subject=video_subject, language="zh-CN", paragraph_number=1
+    )
    print("######################")
    print(script)
-    search_terms = generate_terms(video_subject=video_subject, video_script=script, amount=5)
+    search_terms = generate_terms(
+        video_subject=video_subject, video_script=script, amount=5
+    )
    print("######################")
    print(search_terms)
--- a/app/services/material.py
+++ b/app/services/material.py
@@ -19,7 +19,8 @@ def get_api_key(cfg_key: str):
    if not api_keys:
        raise ValueError(
            f"\n\n##### {cfg_key} is not set #####\n\nPlease set it in the config.toml file: {config.config_file}\n\n"
-            f"{utils.to_json(config.app)}")
+            f"{utils.to_json(config.app)}"
+        )

    # if only one key is provided, return it
    if isinstance(api_keys, str):
@@ -30,28 +31,29 @@ def get_api_key(cfg_key: str):
    return api_keys[requested_count % len(api_keys)]


-def search_videos_pexels(search_term: str,
-                         minimum_duration: int,
-                         video_aspect: VideoAspect = VideoAspect.portrait,
-                         ) -> List[MaterialInfo]:
+def search_videos_pexels(
+    search_term: str,
+    minimum_duration: int,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+) -> List[MaterialInfo]:
    aspect = VideoAspect(video_aspect)
    video_orientation = aspect.name
    video_width, video_height = aspect.to_resolution()
    api_key = get_api_key("pexels_api_keys")
-    headers = {
-        "Authorization": api_key
-    }
+    headers = {"Authorization": api_key}
    # Build URL
-    params = {
-        "query": search_term,
-        "per_page": 20,
-        "orientation": video_orientation
-    }
+    params = {"query": search_term, "per_page": 20, "orientation": video_orientation}
    query_url = f"https://api.pexels.com/videos/search?{urlencode(params)}"
    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")

    try:
-        r = requests.get(query_url, headers=headers, proxies=config.proxy, verify=False, timeout=(30, 60))
+        r = requests.get(
+            query_url,
+            headers=headers,
+            proxies=config.proxy,
+            verify=False,
+            timeout=(30, 60),
+        )
        response = r.json()
        video_items = []
        if "videos" not in response:
@@ -83,10 +85,11 @@ def search_videos_pexels(search_term: str,
    return []


-def search_videos_pixabay(search_term: str,
-                          minimum_duration: int,
-                          video_aspect: VideoAspect = VideoAspect.portrait,
-                          ) -> List[MaterialInfo]:
+def search_videos_pixabay(
+    search_term: str,
+    minimum_duration: int,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+) -> List[MaterialInfo]:
    aspect = VideoAspect(video_aspect)

    video_width, video_height = aspect.to_resolution()
@@ -97,13 +100,15 @@ def search_videos_pixabay(search_term: str,
        "q": search_term,
        "video_type": "all",  # Accepted values: "all", "film", "animation"
        "per_page": 50,
-        "key": api_key
+        "key": api_key,
    }
    query_url = f"https://pixabay.com/api/videos/?{urlencode(params)}"
    logger.info(f"searching videos: {query_url}, with proxies: {config.proxy}")

    try:
-        r = requests.get(query_url, proxies=config.proxy, verify=False, timeout=(30, 60))
+        r = requests.get(
+            query_url, proxies=config.proxy, verify=False, timeout=(30, 60)
+        )
        response = r.json()
        video_items = []
        if "hits" not in response:
@@ -155,7 +160,11 @@ def save_video(video_url: str, save_dir: str = "") -> str:

    # if video does not exist, download it
    with open(video_path, "wb") as f:
-        f.write(requests.get(video_url, proxies=config.proxy, verify=False, timeout=(60, 240)).content)
+        f.write(
+            requests.get(
+                video_url, proxies=config.proxy, verify=False, timeout=(60, 240)
+            ).content
+        )

    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
        try:
@@ -174,14 +183,15 @@ def save_video(video_url: str, save_dir: str = "") -> str:
    return ""


-def download_videos(task_id: str,
-                    search_terms: List[str],
-                    source: str = "pexels",
-                    video_aspect: VideoAspect = VideoAspect.portrait,
-                    video_contact_mode: VideoConcatMode = VideoConcatMode.random,
-                    audio_duration: float = 0.0,
-                    max_clip_duration: int = 5,
-                    ) -> List[str]:
+def download_videos(
+    task_id: str,
+    search_terms: List[str],
+    source: str = "pexels",
+    video_aspect: VideoAspect = VideoAspect.portrait,
+    video_contact_mode: VideoConcatMode = VideoConcatMode.random,
+    audio_duration: float = 0.0,
+    max_clip_duration: int = 5,
+) -> List[str]:
    valid_video_items = []
    valid_video_urls = []
    found_duration = 0.0
@@ -190,9 +200,11 @@ def download_videos(task_id: str,
        search_videos = search_videos_pixabay

    for search_term in search_terms:
-        video_items = search_videos(search_term=search_term,
-                                    minimum_duration=max_clip_duration,
-                                    video_aspect=video_aspect)
+        video_items = search_videos(
+            search_term=search_term,
+            minimum_duration=max_clip_duration,
+            video_aspect=video_aspect,
+        )
        logger.info(f"found {len(video_items)} videos for '{search_term}'")

        for item in video_items:
@@ -202,7 +214,8 @@ def download_videos(task_id: str,
                found_duration += item.duration

    logger.info(
-        f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds")
+        f"found total videos: {len(valid_video_items)}, required duration: {audio_duration} seconds, found duration: {found_duration} seconds"
+    )
    video_paths = []

    material_directory = config.app.get("material_directory", "").strip()
@@ -218,14 +231,18 @@ def download_videos(task_id: str,
    for item in valid_video_items:
        try:
            logger.info(f"downloading video: {item.url}")
-            saved_video_path = save_video(video_url=item.url, save_dir=material_directory)
+            saved_video_path = save_video(
+                video_url=item.url, save_dir=material_directory
+            )
            if saved_video_path:
                logger.info(f"video saved: {saved_video_path}")
                video_paths.append(saved_video_path)
                seconds = min(max_clip_duration, item.duration)
                total_duration += seconds
                if total_duration > audio_duration:
-                    logger.info(f"total duration of downloaded videos: {total_duration} seconds, skip downloading more")
+                    logger.info(
+                        f"total duration of downloaded videos: {total_duration} seconds, skip downloading more"
+                    )
                    break
        except Exception as e:
            logger.error(f"failed to download video: {utils.to_json(item)} => {str(e)}")
@@ -234,4 +251,6 @@ def download_videos(task_id: str,


 if __name__ == "__main__":
-    download_videos("test123", ["Money Exchange Medium"], audio_duration=100, source="pixabay")
+    download_videos(
+        "test123", ["Money Exchange Medium"], audio_duration=100, source="pixabay"
+    )
--- a/app/services/state.py
+++ b/app/services/state.py
@@ -6,7 +6,6 @@ from app.models import const

 # Base class for state management
 class BaseState(ABC):
-
    @abstractmethod
    def update_task(self, task_id: str, state: int, progress: int = 0, **kwargs):
        pass
@@ -18,11 +17,16 @@ class BaseState(ABC):

 # Memory state management
 class MemoryState(BaseState):
-
    def __init__(self):
        self._tasks = {}

-    def update_task(self, task_id: str, state: int = const.TASK_STATE_PROCESSING, progress: int = 0, **kwargs):
+    def update_task(
+        self,
+        task_id: str,
+        state: int = const.TASK_STATE_PROCESSING,
+        progress: int = 0,
+        **kwargs,
+    ):
        progress = int(progress)
        if progress > 100:
            progress = 100
@@ -43,12 +47,18 @@ class MemoryState(BaseState):

 # Redis state management
 class RedisState(BaseState):
-
-    def __init__(self, host='localhost', port=6379, db=0, password=None):
+    def __init__(self, host="localhost", port=6379, db=0, password=None):
        import redis
+
        self._redis = redis.StrictRedis(host=host, port=port, db=db, password=password)

-    def update_task(self, task_id: str, state: int = const.TASK_STATE_PROCESSING, progress: int = 0, **kwargs):
+    def update_task(
+        self,
+        task_id: str,
+        state: int = const.TASK_STATE_PROCESSING,
+        progress: int = 0,
+        **kwargs,
+    ):
        progress = int(progress)
        if progress > 100:
            progress = 100
@@ -67,7 +77,10 @@ class RedisState(BaseState):
        if not task_data:
            return None

-        task = {key.decode('utf-8'): self._convert_to_original_type(value) for key, value in task_data.items()}
+        task = {
+            key.decode("utf-8"): self._convert_to_original_type(value)
+            for key, value in task_data.items()
+        }
        return task

    def delete_task(self, task_id: str):
@@ -79,7 +92,7 @@ class RedisState(BaseState):
        Convert the value from byte string to its original data type.
        You can extend this method to handle other data types as needed.
        """
-        value_str = value.decode('utf-8')
+        value_str = value.decode("utf-8")

        try:
            # try to convert byte string array to list
@@ -100,4 +113,10 @@ _redis_port = config.app.get("redis_port", 6379)
 _redis_db = config.app.get("redis_db", 0)
 _redis_password = config.app.get("redis_password", None)

-state = RedisState(host=_redis_host, port=_redis_port, db=_redis_db, password=_redis_password) if _enable_redis else MemoryState()
+state = (
+    RedisState(
+        host=_redis_host, port=_redis_port, db=_redis_db, password=_redis_password
+    )
+    if _enable_redis
+    else MemoryState()
+)
--- a/app/services/subtitle.py
+++ b/app/services/subtitle.py
@@ -23,18 +23,22 @@ def create(audio_file, subtitle_file: str = ""):
        if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
            model_path = model_size

-        logger.info(f"loading model: {model_path}, device: {device}, compute_type: {compute_type}")
+        logger.info(
+            f"loading model: {model_path}, device: {device}, compute_type: {compute_type}"
+        )
        try:
-            model = WhisperModel(model_size_or_path=model_path,
-                                 device=device,
-                                 compute_type=compute_type)
+            model = WhisperModel(
+                model_size_or_path=model_path, device=device, compute_type=compute_type
+            )
        except Exception as e:
-            logger.error(f"failed to load model: {e} \n\n"
-                         f"********************************************\n"
-                         f"this may be caused by network issue. \n"
-                         f"please download the model manually and put it in the 'models' folder. \n"
-                         f"see [README.md FAQ](https://github.com/harry0703/MoneyPrinterTurbo) for more details.\n"
-                         f"********************************************\n\n")
+            logger.error(
+                f"failed to load model: {e} \n\n"
+                f"********************************************\n"
+                f"this may be caused by network issue. \n"
+                f"please download the model manually and put it in the 'models' folder. \n"
+                f"see [README.md FAQ](https://github.com/harry0703/MoneyPrinterTurbo) for more details.\n"
+                f"********************************************\n\n"
+            )
            return None

    logger.info(f"start, output file: {subtitle_file}")
@@ -49,7 +53,9 @@ def create(audio_file, subtitle_file: str = ""):
        vad_parameters=dict(min_silence_duration_ms=500),
    )

-    logger.info(f"detected language: '{info.language}', probability: {info.language_probability:.2f}")
+    logger.info(
+        f"detected language: '{info.language}', probability: {info.language_probability:.2f}"
+    )

    start = timer()
    subtitles = []
@@ -62,11 +68,9 @@ def create(audio_file, subtitle_file: str = ""):
        msg = "[%.2fs -> %.2fs] %s" % (seg_start, seg_end, seg_text)
        logger.debug(msg)

-        subtitles.append({
-            "msg": seg_text,
-            "start_time": seg_start,
-            "end_time": seg_end
-        })
+        subtitles.append(
+            {"msg": seg_text, "start_time": seg_start, "end_time": seg_end}
+        )

    for segment in segments:
        words_idx = 0
@@ -119,7 +123,11 @@ def create(audio_file, subtitle_file: str = ""):
    for subtitle in subtitles:
        text = subtitle.get("msg")
        if text:
-            lines.append(utils.text_to_srt(idx, text, subtitle.get("start_time"), subtitle.get("end_time")))
+            lines.append(
+                utils.text_to_srt(
+                    idx, text, subtitle.get("start_time"), subtitle.get("end_time")
+                )
+            )
            idx += 1

    sub = "\n".join(lines) + "\n"
@@ -136,12 +144,12 @@ def file_to_subtitles(filename):
    current_times = None
    current_text = ""
    index = 0
-    with open(filename, 'r', encoding="utf-8") as f:
+    with open(filename, "r", encoding="utf-8") as f:
        for line in f:
            times = re.findall("([0-9]*:[0-9]*:[0-9]*,[0-9]*)", line)
            if times:
                current_times = line
-            elif line.strip() == '' and current_times:
+            elif line.strip() == "" and current_times:
                index += 1
                times_texts.append((index, current_times.strip(), current_text.strip()))
                current_times, current_text = None, ""
@@ -150,27 +158,124 @@ def file_to_subtitles(filename):
    return times_texts


+def levenshtein_distance(s1, s2):
+    if len(s1) < len(s2):
+        return levenshtein_distance(s2, s1)
+
+    if len(s2) == 0:
+        return len(s1)
+
+    previous_row = range(len(s2) + 1)
+    for i, c1 in enumerate(s1):
+        current_row = [i + 1]
+        for j, c2 in enumerate(s2):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row.append(min(insertions, deletions, substitutions))
+        previous_row = current_row
+
+    return previous_row[-1]
+
+
+def similarity(a, b):
+    distance = levenshtein_distance(a.lower(), b.lower())
+    max_length = max(len(a), len(b))
+    return 1 - (distance / max_length)
+
+
 def correct(subtitle_file, video_script):
    subtitle_items = file_to_subtitles(subtitle_file)
    script_lines = utils.split_string_by_punctuations(video_script)

    corrected = False
-    if len(subtitle_items) == len(script_lines):
-        for i in range(len(script_lines)):
-            script_line = script_lines[i].strip()
-            subtitle_line = subtitle_items[i][2]
-            if script_line != subtitle_line:
-                logger.warning(f"line {i + 1}, script: {script_line}, subtitle: {subtitle_line}")
-                subtitle_items[i] = (subtitle_items[i][0], subtitle_items[i][1], script_line)
+    new_subtitle_items = []
+    script_index = 0
+    subtitle_index = 0
+
+    while script_index < len(script_lines) and subtitle_index < len(subtitle_items):
+        script_line = script_lines[script_index].strip()
+        subtitle_line = subtitle_items[subtitle_index][2].strip()
+
+        if script_line == subtitle_line:
+            new_subtitle_items.append(subtitle_items[subtitle_index])
+            script_index += 1
+            subtitle_index += 1
+        else:
+            combined_subtitle = subtitle_line
+            start_time = subtitle_items[subtitle_index][1].split(" --> ")[0]
+            end_time = subtitle_items[subtitle_index][1].split(" --> ")[1]
+            next_subtitle_index = subtitle_index + 1
+
+            while next_subtitle_index < len(subtitle_items):
+                next_subtitle = subtitle_items[next_subtitle_index][2].strip()
+                if similarity(
+                    script_line, combined_subtitle + " " + next_subtitle
+                ) > similarity(script_line, combined_subtitle):
+                    combined_subtitle += " " + next_subtitle
+                    end_time = subtitle_items[next_subtitle_index][1].split(" --> ")[1]
+                    next_subtitle_index += 1
+                else:
+                    break
+
+            if similarity(script_line, combined_subtitle) > 0.8:
+                logger.warning(
+                    f"Merged/Corrected - Script: {script_line}, Subtitle: {combined_subtitle}"
+                )
+                new_subtitle_items.append(
+                    (
+                        len(new_subtitle_items) + 1,
+                        f"{start_time} --> {end_time}",
+                        script_line,
+                    )
+                )
                corrected = True
+            else:
+                logger.warning(
+                    f"Mismatch - Script: {script_line}, Subtitle: {combined_subtitle}"
+                )
+                new_subtitle_items.append(
+                    (
+                        len(new_subtitle_items) + 1,
+                        f"{start_time} --> {end_time}",
+                        script_line,
+                    )
+                )
+                corrected = True
+
+            script_index += 1
+            subtitle_index = next_subtitle_index
+
+    # 处理剩余的脚本行
+    while script_index < len(script_lines):
+        logger.warning(f"Extra script line: {script_lines[script_index]}")
+        if subtitle_index < len(subtitle_items):
+            new_subtitle_items.append(
+                (
+                    len(new_subtitle_items) + 1,
+                    subtitle_items[subtitle_index][1],
+                    script_lines[script_index],
+                )
+            )
+            subtitle_index += 1
+        else:
+            new_subtitle_items.append(
+                (
+                    len(new_subtitle_items) + 1,
+                    "00:00:00,000 --> 00:00:00,000",
+                    script_lines[script_index],
+                )
+            )
+        script_index += 1
+        corrected = True

    if corrected:
        with open(subtitle_file, "w", encoding="utf-8") as fd:
-            for item in subtitle_items:
-                fd.write(f"{item[0]}\n{item[1]}\n{item[2]}\n\n")
-        logger.info(f"subtitle corrected")
+            for i, item in enumerate(new_subtitle_items):
+                fd.write(f"{i + 1}\n{item[1]}\n{item[2]}\n\n")
+        logger.info("Subtitle corrected")
    else:
-        logger.success(f"subtitle is correct")
+        logger.success("Subtitle is correct")


 if __name__ == "__main__":
--- a/app/services/task.py
+++ b/app/services/task.py
@@ -3,61 +3,47 @@ import os.path
 import re
 from os import path

+from edge_tts import SubMaker
 from loguru import logger

 from app.config import config
 from app.models import const
-from app.models.schema import VideoParams, VideoConcatMode
-from app.services import llm, material, voice, video, subtitle
+from app.models.schema import VideoConcatMode, VideoParams
+from app.services import llm, material, subtitle, video, voice
 from app.services import state as sm
 from app.utils import utils


-def start(task_id, params: VideoParams):
-    """
-    {
-        "video_subject": "",
-        "video_aspect": "横屏 16:9（西瓜视频）",
-        "voice_name": "女生-晓晓",
-        "enable_bgm": false,
-        "font_name": "STHeitiMedium 黑体-中",
-        "text_color": "#FFFFFF",
-        "font_size": 60,
-        "stroke_color": "#000000",
-        "stroke_width": 1.5
-    }
-    """
-    logger.info(f"start task: {task_id}")
-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
-
-    video_subject = params.video_subject
-    voice_name = voice.parse_voice_name(params.voice_name)
-    paragraph_number = params.paragraph_number
-    n_threads = params.n_threads
-    max_clip_duration = params.video_clip_duration
-
+def generate_script(task_id, params):
    logger.info("\n\n## generating video script")
    video_script = params.video_script.strip()
    if not video_script:
-        video_script = llm.generate_script(video_subject=video_subject, language=params.video_language,
-                                           paragraph_number=paragraph_number)
+        video_script = llm.generate_script(
+            video_subject=params.video_subject,
+            language=params.video_language,
+            paragraph_number=params.paragraph_number,
+        )
    else:
        logger.debug(f"video script: \n{video_script}")

    if not video_script:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        logger.error("failed to generate video script.")
-        return
+        return None

-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
+    return video_script

+
+def generate_terms(task_id, params, video_script):
    logger.info("\n\n## generating video terms")
    video_terms = params.video_terms
    if not video_terms:
-        video_terms = llm.generate_terms(video_subject=video_subject, video_script=video_script, amount=5)
+        video_terms = llm.generate_terms(
+            video_subject=params.video_subject, video_script=video_script, amount=5
+        )
    else:
        if isinstance(video_terms, str):
-            video_terms = [term.strip() for term in re.split(r'[,，]', video_terms)]
+            video_terms = [term.strip() for term in re.split(r"[,，]", video_terms)]
        elif isinstance(video_terms, list):
            video_terms = [term.strip() for term in video_terms]
        else:
@@ -68,9 +54,13 @@ def start(task_id, params: VideoParams):
    if not video_terms:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        logger.error("failed to generate video terms.")
-        return
+        return None

-    script_file = path.join(utils.task_dir(task_id), f"script.json")
+    return video_terms
+
+
+def save_script_data(task_id, video_script, video_terms, params):
+    script_file = path.join(utils.task_dir(task_id), "script.json")
    script_data = {
        "script": video_script,
        "search_terms": video_terms,
@@ -80,11 +70,16 @@ def start(task_id, params: VideoParams):
    with open(script_file, "w", encoding="utf-8") as f:
        f.write(utils.to_json(script_data))

-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)

+def generate_audio(task_id, params, video_script):
    logger.info("\n\n## generating audio")
-    audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
-    sub_maker = voice.tts(text=video_script, voice_name=voice_name, voice_file=audio_file)
+    audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
+    sub_maker = voice.tts(
+        text=video_script,
+        voice_name=voice.parse_voice_name(params.voice_name),
+        voice_rate=params.voice_rate,
+        voice_file=audio_file,
+    )
    if sub_maker is None:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        logger.error(
@@ -93,86 +88,100 @@ def start(task_id, params: VideoParams):
 2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
        """.strip()
        )
-        return
+        return None, None, None

-    audio_duration = voice.get_audio_duration(sub_maker)
-    audio_duration = math.ceil(audio_duration)
+    audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
+    return audio_file, audio_duration, sub_maker

-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)

-    subtitle_path = ""
-    if params.subtitle_enabled:
-        subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
-        subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
-        logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
-        subtitle_fallback = False
-        if subtitle_provider == "edge":
-            voice.create_subtitle(text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path)
-            if not os.path.exists(subtitle_path):
-                subtitle_fallback = True
-                logger.warning("subtitle file not found, fallback to whisper")
+def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
+    if not params.subtitle_enabled:
+        return ""

-        if subtitle_provider == "whisper" or subtitle_fallback:
-            subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
-            logger.info("\n\n## correcting subtitle")
-            subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
+    subtitle_path = path.join(utils.task_dir(task_id), "subtitle.srt")
+    subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
+    logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")

-        subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
-        if not subtitle_lines:
-            logger.warning(f"subtitle file is invalid: {subtitle_path}")
-            subtitle_path = ""
+    subtitle_fallback = False
+    if subtitle_provider == "edge":
+        voice.create_subtitle(
+            text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
+        )
+        if not os.path.exists(subtitle_path):
+            subtitle_fallback = True
+            logger.warning("subtitle file not found, fallback to whisper")

-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
+    if subtitle_provider == "whisper" or subtitle_fallback:
+        subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
+        logger.info("\n\n## correcting subtitle")
+        subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)

-    downloaded_videos = []
+    subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
+    if not subtitle_lines:
+        logger.warning(f"subtitle file is invalid: {subtitle_path}")
+        return ""
+
+    return subtitle_path
+
+
+def get_video_materials(task_id, params, video_terms, audio_duration):
    if params.video_source == "local":
        logger.info("\n\n## preprocess local materials")
-        materials = video.preprocess_video(materials=params.video_materials, clip_duration=max_clip_duration)
-        print(materials)
-
+        materials = video.preprocess_video(
+            materials=params.video_materials, clip_duration=params.video_clip_duration
+        )
        if not materials:
            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
-            logger.error("no valid materials found, please check the materials and try again.")
-            return
-        for material_info in materials:
-            print(material_info)
-            downloaded_videos.append(material_info.url)
+            logger.error(
+                "no valid materials found, please check the materials and try again."
+            )
+            return None
+        return [material_info.url for material_info in materials]
    else:
        logger.info(f"\n\n## downloading videos from {params.video_source}")
-        downloaded_videos = material.download_videos(task_id=task_id,
-                                                     search_terms=video_terms,
-                                                     source=params.video_source,
-                                                     video_aspect=params.video_aspect,
-                                                     video_contact_mode=params.video_concat_mode,
-                                                     audio_duration=audio_duration * params.video_count,
-                                                     max_clip_duration=max_clip_duration,
-                                                     )
-    if not downloaded_videos:
-        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
-        logger.error(
-            "failed to download videos, maybe the network is not available. if you are in China, please use a VPN.")
-        return
+        downloaded_videos = material.download_videos(
+            task_id=task_id,
+            search_terms=video_terms,
+            source=params.video_source,
+            video_aspect=params.video_aspect,
+            video_contact_mode=params.video_concat_mode,
+            audio_duration=audio_duration * params.video_count,
+            max_clip_duration=params.video_clip_duration,
+        )
+        if not downloaded_videos:
+            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+            logger.error(
+                "failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
+            )
+            return None
+        return downloaded_videos

-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)

+def generate_final_videos(
+        task_id, params, downloaded_videos, audio_file, subtitle_path
+):
    final_video_paths = []
    combined_video_paths = []
-    video_concat_mode = params.video_concat_mode
-    if params.video_count > 1:
-        video_concat_mode = VideoConcatMode.random
+    video_concat_mode = (
+        params.video_concat_mode if params.video_count == 1 else VideoConcatMode.random
+    )

    _progress = 50
    for i in range(params.video_count):
        index = i + 1
-        combined_video_path = path.join(utils.task_dir(task_id), f"combined-{index}.mp4")
+        combined_video_path = path.join(
+            utils.task_dir(task_id), f"combined-{index}.mp4"
+        )
        logger.info(f"\n\n## combining video: {index} => {combined_video_path}")
-        video.combine_videos(combined_video_path=combined_video_path,
-                             video_paths=downloaded_videos,
-                             audio_file=audio_file,
-                             video_aspect=params.video_aspect,
-                             video_concat_mode=video_concat_mode,
-                             max_clip_duration=max_clip_duration,
-                             threads=n_threads)
+        video.combine_videos(
+            combined_video_path=combined_video_path,
+            video_paths=downloaded_videos,
+            audio_file=audio_file,
+            video_aspect=params.video_aspect,
+            video_concat_mode=video_concat_mode,
+            max_clip_duration=params.video_clip_duration,
+            threads=params.n_threads,
+        )

        _progress += 50 / params.video_count / 2
        sm.state.update_task(task_id, progress=_progress)
@@ -180,13 +189,13 @@ def start(task_id, params: VideoParams):
        final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")

        logger.info(f"\n\n## generating video: {index} => {final_video_path}")
-        # Put everything together
-        video.generate_video(video_path=combined_video_path,
-                             audio_path=audio_file,
-                             subtitle_path=subtitle_path,
-                             output_file=final_video_path,
-                             params=params,
-                             )
+        video.generate_video(
+            video_path=combined_video_path,
+            audio_path=audio_file,
+            subtitle_path=subtitle_path,
+            output_file=final_video_path,
+            params=params,
+        )

        _progress += 50 / params.video_count / 2
        sm.state.update_task(task_id, progress=_progress)
@@ -194,16 +203,133 @@ def start(task_id, params: VideoParams):
        final_video_paths.append(final_video_path)
        combined_video_paths.append(combined_video_path)

-    logger.success(f"task {task_id} finished, generated {len(final_video_paths)} videos.")
+    return final_video_paths, combined_video_paths
+
+
+def start(task_id, params: VideoParams, stop_at: str = "video"):
+    logger.info(f"start task: {task_id}, stop_at: {stop_at}")
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
+
+    if type(params.video_concat_mode) is str:
+        params.video_concat_mode = VideoConcatMode(params.video_concat_mode)
+        
+    # 1. Generate script
+    video_script = generate_script(task_id, params)
+    if not video_script:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
+
+    if stop_at == "script":
+        sm.state.update_task(
+            task_id, state=const.TASK_STATE_COMPLETE, progress=100, script=video_script
+        )
+        return {"script": video_script}
+
+    # 2. Generate terms
+    video_terms = ""
+    if params.video_source != "local":
+        video_terms = generate_terms(task_id, params, video_script)
+        if not video_terms:
+            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+            return
+
+    save_script_data(task_id, video_script, video_terms, params)
+
+    if stop_at == "terms":
+        sm.state.update_task(
+            task_id, state=const.TASK_STATE_COMPLETE, progress=100, terms=video_terms
+        )
+        return {"script": video_script, "terms": video_terms}
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
+
+    # 3. Generate audio
+    audio_file, audio_duration, sub_maker = generate_audio(task_id, params, video_script)
+    if not audio_file:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
+
+    if stop_at == "audio":
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_COMPLETE,
+            progress=100,
+            audio_file=audio_file,
+        )
+        return {"audio_file": audio_file, "audio_duration": audio_duration}
+
+    # 4. Generate subtitle
+    subtitle_path = generate_subtitle(task_id, params, video_script, sub_maker, audio_file)
+
+    if stop_at == "subtitle":
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_COMPLETE,
+            progress=100,
+            subtitle_path=subtitle_path,
+        )
+        return {"subtitle_path": subtitle_path}
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
+
+    # 5. Get video materials
+    downloaded_videos = get_video_materials(
+        task_id, params, video_terms, audio_duration
+    )
+    if not downloaded_videos:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+
+    if stop_at == "materials":
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_COMPLETE,
+            progress=100,
+            materials=downloaded_videos,
+        )
+        return {"materials": downloaded_videos}
+
+    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
+
+    # 6. Generate final videos
+    final_video_paths, combined_video_paths = generate_final_videos(
+        task_id, params, downloaded_videos, audio_file, subtitle_path
+    )
+
+    if not final_video_paths:
+        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
+        return
+
+    logger.success(
+        f"task {task_id} finished, generated {len(final_video_paths)} videos."
+    )

    kwargs = {
        "videos": final_video_paths,
-        "combined_videos": combined_video_paths
+        "combined_videos": combined_video_paths,
+        "script": video_script,
+        "terms": video_terms,
+        "audio_file": audio_file,
+        "audio_duration": audio_duration,
+        "subtitle_path": subtitle_path,
+        "materials": downloaded_videos,
    }
-    sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
+    sm.state.update_task(
+        task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs
+    )
    return kwargs

-# def start_test(task_id, params: VideoParams):
-#     print(f"start task {task_id} \n")
-#     time.sleep(5)
-#     print(f"task {task_id} finished \n")
+
+if __name__ == "__main__":
+    task_id = "task_id"
+    params = VideoParams(
+        video_subject="金钱的作用",
+        voice_name="zh-CN-XiaoyiNeural-Female",
+        voice_rate=1.0,
+
+    )
+    start(task_id, params, stop_at="video")
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -1,13 +1,14 @@
 import glob
 import random
 from typing import List
-from PIL import ImageFont, Image
+
 from loguru import logger
 from moviepy.editor import *
 from moviepy.video.tools.subtitles import SubtitlesClip
+from PIL import ImageFont

 from app.models import const
-from app.models.schema import VideoAspect, VideoParams, VideoConcatMode, MaterialInfo
+from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams
 from app.utils import utils


@@ -27,14 +28,15 @@ def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
    return ""


-def combine_videos(combined_video_path: str,
-                   video_paths: List[str],
-                   audio_file: str,
-                   video_aspect: VideoAspect = VideoAspect.portrait,
-                   video_concat_mode: VideoConcatMode = VideoConcatMode.random,
-                   max_clip_duration: int = 5,
-                   threads: int = 2,
-                   ) -> str:
+def combine_videos(
+    combined_video_path: str,
+    video_paths: List[str],
+    audio_file: str,
+    video_aspect: VideoAspect = VideoAspect.portrait,
+    video_concat_mode: VideoConcatMode = VideoConcatMode.random,
+    max_clip_duration: int = 5,
+    threads: int = 2,
+) -> str:
    audio_clip = AudioFileClip(audio_file)
    audio_duration = audio_clip.duration
    logger.info(f"max duration of audio: {audio_duration} seconds")
@@ -102,13 +104,19 @@ def combine_videos(combined_video_path: str,
                    new_height = int(clip_h * scale_factor)
                    clip_resized = clip.resize(newsize=(new_width, new_height))

-                    background = ColorClip(size=(video_width, video_height), color=(0, 0, 0))
-                    clip = CompositeVideoClip([
-                        background.set_duration(clip.duration),
-                        clip_resized.set_position("center")
-                    ])
+                    background = ColorClip(
+                        size=(video_width, video_height), color=(0, 0, 0)
+                    )
+                    clip = CompositeVideoClip(
+                        [
+                            background.set_duration(clip.duration),
+                            clip_resized.set_position("center"),
+                        ]
+                    )

-                logger.info(f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}")
+                logger.info(
+                    f"resizing video to {video_width} x {video_height}, clip size: {clip_w} x {clip_h}"
+                )

            if clip.duration > max_clip_duration:
                clip = clip.subclip(0, max_clip_duration)
@@ -118,21 +126,22 @@ def combine_videos(combined_video_path: str,

    video_clip = concatenate_videoclips(clips)
    video_clip = video_clip.set_fps(30)
-    logger.info(f"writing")
+    logger.info("writing")
    # https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030
-    video_clip.write_videofile(filename=combined_video_path,
-                               threads=threads,
-                               logger=None,
-                               temp_audiofile_path=output_dir,
-                               audio_codec="aac",
-                               fps=30,
-                               )
+    video_clip.write_videofile(
+        filename=combined_video_path,
+        threads=threads,
+        logger=None,
+        temp_audiofile_path=output_dir,
+        audio_codec="aac",
+        fps=30,
+    )
    video_clip.close()
-    logger.success(f"completed")
+    logger.success("completed")
    return combined_video_path


-def wrap_text(text, max_width, font='Arial', fontsize=60):
+def wrap_text(text, max_width, font="Arial", fontsize=60):
    # 创建字体对象
    font = ImageFont.truetype(font, fontsize)

@@ -151,7 +160,7 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):

    _wrapped_lines_ = []
    words = text.split(" ")
-    _txt_ = ''
+    _txt_ = ""
    for word in words:
        _before = _txt_
        _txt_ += f"{word} "
@@ -167,14 +176,14 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
    _wrapped_lines_.append(_txt_)
    if processed:
        _wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
-        result = '\n'.join(_wrapped_lines_).strip()
+        result = "\n".join(_wrapped_lines_).strip()
        height = len(_wrapped_lines_) * height
        # logger.warning(f"wrapped text: {result}")
        return result, height

    _wrapped_lines_ = []
    chars = list(text)
-    _txt_ = ''
+    _txt_ = ""
    for word in chars:
        _txt_ += word
        _width, _height = get_text_size(_txt_)
@@ -182,20 +191,21 @@ def wrap_text(text, max_width, font='Arial', fontsize=60):
            continue
        else:
            _wrapped_lines_.append(_txt_)
-            _txt_ = ''
+            _txt_ = ""
    _wrapped_lines_.append(_txt_)
-    result = '\n'.join(_wrapped_lines_).strip()
+    result = "\n".join(_wrapped_lines_).strip()
    height = len(_wrapped_lines_) * height
    # logger.warning(f"wrapped text: {result}")
    return result, height


-def generate_video(video_path: str,
-                   audio_path: str,
-                   subtitle_path: str,
-                   output_file: str,
-                   params: VideoParams,
-                   ):
+def generate_video(
+    video_path: str,
+    audio_path: str,
+    subtitle_path: str,
+    output_file: str,
+    params: VideoParams,
+):
    aspect = VideoAspect(params.video_aspect)
    video_width, video_height = aspect.to_resolution()

@@ -215,7 +225,7 @@ def generate_video(video_path: str,
        if not params.font_name:
            params.font_name = "STHeitiMedium.ttc"
        font_path = os.path.join(utils.font_dir(), params.font_name)
-        if os.name == 'nt':
+        if os.name == "nt":
            font_path = font_path.replace("\\", "/")

        logger.info(f"using font: {font_path}")
@@ -223,11 +233,9 @@ def generate_video(video_path: str,
    def create_text_clip(subtitle_item):
        phrase = subtitle_item[1]
        max_width = video_width * 0.9
-        wrapped_txt, txt_height = wrap_text(phrase,
-                                            max_width=max_width,
-                                            font=font_path,
-                                            fontsize=params.font_size
-                                            )
+        wrapped_txt, txt_height = wrap_text(
+            phrase, max_width=max_width, font=font_path, fontsize=params.font_size
+        )
        _clip = TextClip(
            wrapped_txt,
            font=font_path,
@@ -243,18 +251,26 @@ def generate_video(video_path: str,
        _clip = _clip.set_end(subtitle_item[0][1])
        _clip = _clip.set_duration(duration)
        if params.subtitle_position == "bottom":
-            _clip = _clip.set_position(('center', video_height * 0.95 - _clip.h))
+            _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
        elif params.subtitle_position == "top":
-            _clip = _clip.set_position(('center', video_height * 0.1))
-        else:
-            _clip = _clip.set_position(('center', 'center'))
+            _clip = _clip.set_position(("center", video_height * 0.05))
+        elif params.subtitle_position == "custom":
+            # 确保字幕完全在屏幕内
+            margin = 10  # 额外的边距，单位为像素
+            max_y = video_height - _clip.h - margin
+            min_y = margin
+            custom_y = (video_height - _clip.h) * (params.custom_position / 100)
+            custom_y = max(min_y, min(custom_y, max_y))  # 限制 y 值在有效范围内
+            _clip = _clip.set_position(("center", custom_y))
+        else:  # center
+            _clip = _clip.set_position(("center", "center"))
        return _clip

    video_clip = VideoFileClip(video_path)
    audio_clip = AudioFileClip(audio_path).volumex(params.voice_volume)

    if subtitle_path and os.path.exists(subtitle_path):
-        sub = SubtitlesClip(subtitles=subtitle_path, encoding='utf-8')
+        sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
        text_clips = []
        for item in sub.subtitles:
            clip = create_text_clip(subtitle_item=item)
@@ -264,24 +280,26 @@ def generate_video(video_path: str,
    bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
    if bgm_file:
        try:
-            bgm_clip = (AudioFileClip(bgm_file)
-                        .volumex(params.bgm_volume)
-                        .audio_fadeout(3))
+            bgm_clip = (
+                AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
+            )
            bgm_clip = afx.audio_loop(bgm_clip, duration=video_clip.duration)
            audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
        except Exception as e:
            logger.error(f"failed to add bgm: {str(e)}")

    video_clip = video_clip.set_audio(audio_clip)
-    video_clip.write_videofile(output_file,
-                               audio_codec="aac",
-                               temp_audiofile_path=output_dir,
-                               threads=params.n_threads or 2,
-                               logger=None,
-                               fps=30,
-                               )
+    video_clip.write_videofile(
+        output_file,
+        audio_codec="aac",
+        temp_audiofile_path=output_dir,
+        threads=params.n_threads or 2,
+        logger=None,
+        fps=30,
+    )
    video_clip.close()
-    logger.success(f"completed")
+    del video_clip
+    logger.success("completed")


 def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
@@ -292,7 +310,7 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
        ext = utils.parse_extension(material.url)
        try:
            clip = VideoFileClip(material.url)
-        except Exception as e:
+        except Exception:
            clip = ImageClip(material.url)

        width = clip.size[0]
@@ -304,12 +322,18 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
        if ext in const.FILE_TYPE_IMAGES:
            logger.info(f"processing image: {material.url}")
            # 创建一个图片剪辑，并设置持续时间为3秒钟
-            clip = ImageClip(material.url).set_duration(clip_duration).set_position("center")
+            clip = (
+                ImageClip(material.url)
+                .set_duration(clip_duration)
+                .set_position("center")
+            )
            # 使用resize方法来添加缩放效果。这里使用了lambda函数来使得缩放效果随时间变化。
            # 假设我们想要从原始大小逐渐放大到120%的大小。
            # t代表当前时间，clip.duration为视频总时长，这里是3秒。
            # 注意：1 表示100%的大小，所以1.2表示120%的大小
-            zoom_clip = clip.resize(lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration))
+            zoom_clip = clip.resize(
+                lambda t: 1 + (clip_duration * 0.03) * (t / clip.duration)
+            )

            # 如果需要，可以创建一个包含缩放剪辑的复合视频剪辑
            # （这在您想要在视频中添加其他元素时非常有用）
@@ -319,6 +343,7 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
            video_file = f"{material.url}.mp4"
            final_clip.write_videofile(video_file, fps=30, logger=None)
            final_clip.close()
+            del final_clip
            material.url = video_file
            logger.success(f"completed: {video_file}")
    return materials
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -988,7 +988,7 @@ Name: zh-CN-XiaoxiaoMultilingualNeural-V2
 Gender: Female
    """.strip()
    voices = []
-    name = ''
+    name = ""
    for line in voices_str.split("\n"):
        line = line.strip()
        if not line:
@@ -1008,7 +1008,7 @@ Gender: Female
                            voices.append(f"{name}-{gender}")
                else:
                    voices.append(f"{name}-{gender}")
-                name = ''
+                name = ""
    voices.sort()
    return voices

@@ -1028,28 +1028,45 @@ def is_azure_v2_voice(voice_name: str):
    return ""


-def tts(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
+def tts(
+    text: str, voice_name: str, voice_rate: float, voice_file: str
+) -> [SubMaker, None]:
    if is_azure_v2_voice(voice_name):
        return azure_tts_v2(text, voice_name, voice_file)
-    return azure_tts_v1(text, voice_name, voice_file)
+    return azure_tts_v1(text, voice_name, voice_rate, voice_file)


-def azure_tts_v1(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
+def convert_rate_to_percent(rate: float) -> str:
+    if rate == 1.0:
+        return "+0%"
+    percent = round((rate - 1.0) * 100)
+    if percent > 0:
+        return f"+{percent}%"
+    else:
+        return f"{percent}%"
+
+
+def azure_tts_v1(
+    text: str, voice_name: str, voice_rate: float, voice_file: str
+) -> [SubMaker, None]:
    voice_name = parse_voice_name(voice_name)
    text = text.strip()
+    rate_str = convert_rate_to_percent(voice_rate)
    for i in range(3):
        try:
            logger.info(f"start, voice name: {voice_name}, try: {i + 1}")

            async def _do() -> SubMaker:
-                communicate = edge_tts.Communicate(text, voice_name)
+                communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
                sub_maker = edge_tts.SubMaker()
                with open(voice_file, "wb") as file:
                    async for chunk in communicate.stream():
                        if chunk["type"] == "audio":
                            file.write(chunk["data"])
                        elif chunk["type"] == "WordBoundary":
-                            sub_maker.create_sub((chunk["offset"], chunk["duration"]), chunk["text"])
+                            sub_maker.create_sub(
+                                (chunk["offset"], chunk["duration"]), chunk["text"]
+                            )
                return sub_maker

            sub_maker = asyncio.run(_do())
@@ -1074,8 +1091,12 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
    def _format_duration_to_offset(duration) -> int:
        if isinstance(duration, str):
            time_obj = datetime.strptime(duration, "%H:%M:%S.%f")
-            milliseconds = (time_obj.hour * 3600000) + (time_obj.minute * 60000) + (time_obj.second * 1000) + (
-                    time_obj.microsecond // 1000)
+            milliseconds = (
+                (time_obj.hour * 3600000)
+                + (time_obj.minute * 60000)
+                + (time_obj.second * 1000)
+                + (time_obj.microsecond // 1000)
+            )
            return milliseconds * 10000

        if isinstance(duration, int):
@@ -1108,20 +1129,29 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
            # Creates an instance of a speech config with specified subscription key and service region.
            speech_key = config.azure.get("speech_key", "")
            service_region = config.azure.get("speech_region", "")
-            audio_config = speechsdk.audio.AudioOutputConfig(filename=voice_file, use_default_speaker=True)
-            speech_config = speechsdk.SpeechConfig(subscription=speech_key,
-                                                   region=service_region)
+            audio_config = speechsdk.audio.AudioOutputConfig(
+                filename=voice_file, use_default_speaker=True
+            )
+            speech_config = speechsdk.SpeechConfig(
+                subscription=speech_key, region=service_region
+            )
            speech_config.speech_synthesis_voice_name = voice_name
            # speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
            #                            value='true')
-            speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
-                                       value='true')
+            speech_config.set_property(
+                property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
+                value="true",
+            )

            speech_config.set_speech_synthesis_output_format(
-                speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3)
-            speech_synthesizer = speechsdk.SpeechSynthesizer(audio_config=audio_config,
-                                                             speech_config=speech_config)
-            speech_synthesizer.synthesis_word_boundary.connect(speech_synthesizer_word_boundary_cb)
+                speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3
+            )
+            speech_synthesizer = speechsdk.SpeechSynthesizer(
+                audio_config=audio_config, speech_config=speech_config
+            )
+            speech_synthesizer.synthesis_word_boundary.connect(
+                speech_synthesizer_word_boundary_cb
+            )

            result = speech_synthesizer.speak_text_async(text).get()
            if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
@@ -1129,9 +1159,13 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
                return sub_maker
            elif result.reason == speechsdk.ResultReason.Canceled:
                cancellation_details = result.cancellation_details
-                logger.error(f"azure v2 speech synthesis canceled: {cancellation_details.reason}")
+                logger.error(
+                    f"azure v2 speech synthesis canceled: {cancellation_details.reason}"
+                )
                if cancellation_details.reason == speechsdk.CancellationReason.Error:
-                    logger.error(f"azure v2 speech synthesis error: {cancellation_details.error_details}")
+                    logger.error(
+                        f"azure v2 speech synthesis error: {cancellation_details.error_details}"
+                    )
            logger.info(f"completed, output file: {voice_file}")
        except Exception as e:
            logger.error(f"failed, error: {str(e)}")
@@ -1168,11 +1202,7 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
        """
        start_t = mktimestamp(start_time).replace(".", ",")
        end_t = mktimestamp(end_time).replace(".", ",")
-        return (
-            f"{idx}\n"
-            f"{start_t} --> {end_t}\n"
-            f"{sub_text}\n"
-        )
+        return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"

    start_time = -1.0
    sub_items = []
@@ -1229,12 +1259,16 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
            try:
                sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
                duration = max([tb for ((ta, tb), txt) in sbs])
-                logger.info(f"completed, subtitle file created: {subtitle_file}, duration: {duration}")
+                logger.info(
+                    f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
+                )
            except Exception as e:
                logger.error(f"failed, error: {str(e)}")
                os.remove(subtitle_file)
        else:
-            logger.warning(f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}")
+            logger.warning(
+                f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}"
+            )

    except Exception as e:
        logger.error(f"failed, error: {str(e)}")
@@ -1258,7 +1292,6 @@ if __name__ == "__main__":
    voices = get_all_azure_voices()
    print(len(voices))

-
    async def _do():
        temp_dir = utils.storage_dir("temp")

@@ -1307,12 +1340,13 @@ if __name__ == "__main__":
        for voice_name in voice_names:
            voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
            subtitle_file = f"{temp_dir}/tts.mp3.srt"
-            sub_maker = azure_tts_v2(text=text, voice_name=voice_name, voice_file=voice_file)
+            sub_maker = azure_tts_v2(
+                text=text, voice_name=voice_name, voice_file=voice_file
+            )
            create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
            audio_duration = get_audio_duration(sub_maker)
            print(f"voice: {voice_name}, audio duration: {audio_duration}s")

-
    loop = asyncio.get_event_loop_policy().get_event_loop()
    try:
        loop.run_until_complete(_do())
--- a/app/utils/utils.py
+++ b/app/utils/utils.py
@@ -15,12 +15,12 @@ urllib3.disable_warnings()

 def get_response(status: int, data: Any = None, message: str = ""):
    obj = {
-        'status': status,
+        "status": status,
    }
    if data:
-        obj['data'] = data
+        obj["data"] = data
    if message:
-        obj['message'] = message
+        obj["message"] = message
    return obj


@@ -41,7 +41,7 @@ def to_json(obj):
            elif isinstance(o, (list, tuple)):
                return [serialize(item) for item in o]
            # 如果对象是自定义类型，尝试返回其__dict__属性
-            elif hasattr(o, '__dict__'):
+            elif hasattr(o, "__dict__"):
                return serialize(o.__dict__)
            # 其他情况返回None（或者可以选择抛出异常）
            else:
@@ -199,7 +199,8 @@ def split_string_by_punctuations(s):

 def md5(text):
    import hashlib
-    return hashlib.md5(text.encode('utf-8')).hexdigest()
+
+    return hashlib.md5(text.encode("utf-8")).hexdigest()


 def get_system_locale():
--- a/changelog.py
+++ b/changelog.py
@@ -12,6 +12,6 @@ build_and_render(
    parse_refs=False,
    sections=["build", "deps", "feat", "fix", "refactor"],
    versioning="pep440",
-    bump="1.1.2",   # 指定bump版本
+    bump="1.1.2",  # 指定bump版本
    in_place=True,
 )
--- a/docs/wechat-group.jpg
+++ b/docs/wechat-group.jpg
--- a/main.py
+++ b/main.py
@@ -1,8 +1,16 @@
 import uvicorn
 from loguru import logger
+
 from app.config import config

-if __name__ == '__main__':
-    logger.info("start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs")
-    uvicorn.run(app="app.asgi:app", host=config.listen_host, port=config.listen_port, reload=config.reload_debug,
-                log_level="warning")
+if __name__ == "__main__":
+    logger.info(
+        "start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs"
+    )
+    uvicorn.run(
+        app="app.asgi:app",
+        host=config.listen_host,
+        port=config.listen_port,
+        reload=config.reload_debug,
+        log_level="warning",
+    )
--- a/webui/Main.py
+++ b/webui/Main.py
@@ -1,6 +1,5 @@
-import sys
 import os
-import time
+import sys

 # Add the root directory of the project to the system path to allow importing modules from the project
 root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
@@ -10,31 +9,33 @@ if root_dir not in sys.path:
    print(sys.path)
    print("")

-import streamlit as st
-
 import os
-from uuid import uuid4
 import platform
-import streamlit.components.v1 as components
+from uuid import uuid4
+
+import streamlit as st
 from loguru import logger

-st.set_page_config(page_title="MoneyPrinterTurbo",
-                   page_icon="🤖",
-                   layout="wide",
-                   initial_sidebar_state="auto",
-                   menu_items={
-                       'Report a bug': "https://github.com/harry0703/MoneyPrinterTurbo/issues",
-                       'About': "# MoneyPrinterTurbo\nSimply provide a topic or keyword for a video, and it will "
-                                "automatically generate the video copy, video materials, video subtitles, "
-                                "and video background music before synthesizing a high-definition short "
-                                "video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo"
-                   })
+st.set_page_config(
+    page_title="MoneyPrinterTurbo",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="auto",
+    menu_items={
+        "Report a bug": "https://github.com/harry0703/MoneyPrinterTurbo/issues",
+        "About": "# MoneyPrinterTurbo\nSimply provide a topic or keyword for a video, and it will "
+        "automatically generate the video copy, video materials, video subtitles, "
+        "and video background music before synthesizing a high-definition short "
+        "video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo",
+    },
+)

-from app.models.schema import VideoParams, VideoAspect, VideoConcatMode, MaterialInfo
-from app.services import task as tm, llm, voice
-from app.utils import utils
 from app.config import config
-from app.models.const import FILE_TYPE_VIDEOS, FILE_TYPE_IMAGES
+from app.models.const import FILE_TYPE_IMAGES, FILE_TYPE_VIDEOS
+from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams
+from app.services import llm, voice
+from app.services import task as tm
+from app.utils import utils

 hide_streamlit_style = """
 <style>#root > div:nth-child(1) > div > div > div > div > section > div {padding-top: 0rem;}</style>
@@ -42,7 +43,16 @@ hide_streamlit_style = """
 st.markdown(hide_streamlit_style, unsafe_allow_html=True)
 st.title(f"MoneyPrinterTurbo v{config.project_version}")

-support_locales = ["zh-CN", "zh-HK", "zh-TW", "de-DE", "en-US", "fr-FR", "vi-VN", "th-TH"]
+support_locales = [
+    "zh-CN",
+    "zh-HK",
+    "zh-TW",
+    "de-DE",
+    "en-US",
+    "fr-FR",
+    "vi-VN",
+    "th-TH",
+]

 font_dir = os.path.join(root_dir, "resource", "fonts")
 song_dir = os.path.join(root_dir, "resource", "songs")
@@ -51,14 +61,14 @@ config_file = os.path.join(root_dir, "webui", ".streamlit", "webui.toml")
 system_locale = utils.get_system_locale()
 # print(f"******** system locale: {system_locale} ********")

-if 'video_subject' not in st.session_state:
-    st.session_state['video_subject'] = ''
-if 'video_script' not in st.session_state:
-    st.session_state['video_script'] = ''
-if 'video_terms' not in st.session_state:
-    st.session_state['video_terms'] = ''
-if 'ui_language' not in st.session_state:
-    st.session_state['ui_language'] = config.ui.get("language", system_locale)
+if "video_subject" not in st.session_state:
+    st.session_state["video_subject"] = ""
+if "video_script" not in st.session_state:
+    st.session_state["video_script"] = ""
+if "video_terms" not in st.session_state:
+    st.session_state["video_terms"] = ""
+if "ui_language" not in st.session_state:
+    st.session_state["ui_language"] = config.ui.get("language", system_locale)


 def get_all_fonts():
@@ -85,25 +95,25 @@ def open_task_folder(task_id):
        sys = platform.system()
        path = os.path.join(root_dir, "storage", "tasks", task_id)
        if os.path.exists(path):
-            if sys == 'Windows':
+            if sys == "Windows":
                os.system(f"start {path}")
-            if sys == 'Darwin':
+            if sys == "Darwin":
                os.system(f"open {path}")
    except Exception as e:
        logger.error(e)


 def scroll_to_bottom():
-    js = f"""
+    js = """
    <script>
        console.log("scroll_to_bottom");
-        function scroll(dummy_var_to_force_repeat_execution){{
+        function scroll(dummy_var_to_force_repeat_execution){
            var sections = parent.document.querySelectorAll('section.main');
            console.log(sections);
-            for(let index = 0; index<sections.length; index++) {{
+            for(let index = 0; index<sections.length; index++) {
                sections[index].scrollTop = sections[index].scrollHeight;
-            }}
-        }}
+            }
+        }
        scroll(1);
    </script>
    """
@@ -123,12 +133,15 @@ def init_log():
        record["file"].path = f"./{relative_path}"
        # 返回修改后的格式字符串
        # 您可以根据需要调整这里的格式
-        record['message'] = record['message'].replace(root_dir, ".")
+        record["message"] = record["message"].replace(root_dir, ".")

-        _format = '<green>{time:%Y-%m-%d %H:%M:%S}</> | ' + \
-                  '<level>{level}</> | ' + \
-                  '"{file.path}:{line}":<blue> {function}</> ' + \
-                  '- <level>{message}</>' + "\n"
+        _format = (
+            "<green>{time:%Y-%m-%d %H:%M:%S}</> | "
+            + "<level>{level}</> | "
+            + '"{file.path}:{line}":<blue> {function}</> '
+            + "- <level>{message}</>"
+            + "\n"
+        )
        return _format

    logger.add(
@@ -145,7 +158,7 @@ locales = utils.load_locales(i18n_dir)


 def tr(key):
-    loc = locales.get(st.session_state['ui_language'], {})
+    loc = locales.get(st.session_state["ui_language"], {})
    return loc.get("Translation", {}).get(key, key)


@@ -164,15 +177,22 @@ if not config.app.get("hide_config", False):
            selected_index = 0
            for i, code in enumerate(locales.keys()):
                display_languages.append(f"{code} - {locales[code].get('Language')}")
-                if code == st.session_state['ui_language']:
+                if code == st.session_state["ui_language"]:
                    selected_index = i

-            selected_language = st.selectbox(tr("Language"), options=display_languages,
-                                             index=selected_index)
+            selected_language = st.selectbox(
+                tr("Language"), options=display_languages, index=selected_index
+            )
            if selected_language:
                code = selected_language.split(" - ")[0].strip()
-                st.session_state['ui_language'] = code
-                config.ui['language'] = code
+                st.session_state["ui_language"] = code
+                config.ui["language"] = code
+
+            # 是否禁用日志显示
+            hide_log = st.checkbox(
+                tr("Hide Log"), value=config.app.get("hide_log", False)
+            )
+            config.ui["hide_log"] = hide_log

        with middle_config_panel:
            #   openai
@@ -183,8 +203,19 @@ if not config.app.get("hide_config", False):
            #   qwen (通义千问)
            #   gemini
            #   ollama
-            llm_providers = ['OpenAI', 'Moonshot', 'Azure', 'Qwen', 'DeepSeek', 'Gemini', 'Ollama', 'G4f', 'OneAPI',
-                             "Cloudflare"]
+            llm_providers = [
+                "OpenAI",
+                "Moonshot",
+                "Azure",
+                "Qwen",
+                "DeepSeek",
+                "Gemini",
+                "Ollama",
+                "G4f",
+                "OneAPI",
+                "Cloudflare",
+                "ERNIE",
+            ]
            saved_llm_provider = config.app.get("llm_provider", "OpenAI").lower()
            saved_llm_provider_index = 0
            for i, provider in enumerate(llm_providers):
@@ -192,18 +223,25 @@ if not config.app.get("hide_config", False):
                    saved_llm_provider_index = i
                    break

-            llm_provider = st.selectbox(tr("LLM Provider"), options=llm_providers, index=saved_llm_provider_index)
+            llm_provider = st.selectbox(
+                tr("LLM Provider"),
+                options=llm_providers,
+                index=saved_llm_provider_index,
+            )
            llm_helper = st.container()
            llm_provider = llm_provider.lower()
            config.app["llm_provider"] = llm_provider

            llm_api_key = config.app.get(f"{llm_provider}_api_key", "")
+            llm_secret_key = config.app.get(
+                f"{llm_provider}_secret_key", ""
+            )  # only for baidu ernie
            llm_base_url = config.app.get(f"{llm_provider}_base_url", "")
            llm_model_name = config.app.get(f"{llm_provider}_model_name", "")
            llm_account_id = config.app.get(f"{llm_provider}_account_id", "")

            tips = ""
-            if llm_provider == 'ollama':
+            if llm_provider == "ollama":
                if not llm_model_name:
                    llm_model_name = "qwen:7b"
                if not llm_base_url:
@@ -219,7 +257,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 使用 `ollama list` 查看，比如 `qwen:7b`
                           """

-            if llm_provider == 'openai':
+            if llm_provider == "openai":
                if not llm_model_name:
                    llm_model_name = "gpt-3.5-turbo"
                with llm_helper:
@@ -231,7 +269,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 填写**有权限**的模型，[点击查看模型列表](https://platform.openai.com/settings/organization/limits)
                           """

-            if llm_provider == 'moonshot':
+            if llm_provider == "moonshot":
                if not llm_model_name:
                    llm_model_name = "moonshot-v1-8k"
                with llm_helper:
@@ -241,8 +279,20 @@ if not config.app.get("hide_config", False):
                           - **Base Url**: 固定为 https://api.moonshot.cn/v1
                           - **Model Name**: 比如 moonshot-v1-8k，[点击查看模型列表](https://platform.moonshot.cn/docs/intro#%E6%A8%A1%E5%9E%8B%E5%88%97%E8%A1%A8)
                           """
+            if llm_provider == "oneapi":
+                if not llm_model_name:
+                    llm_model_name = (
+                        "claude-3-5-sonnet-20240620"  # 默认模型，可以根据需要调整
+                    )
+                with llm_helper:
+                    tips = """
+                        ##### OneAPI 配置说明
+                        - **API Key**: 填写您的 OneAPI 密钥
+                        - **Base Url**: 填写 OneAPI 的基础 URL
+                        - **Model Name**: 填写您要使用的模型名称，例如 claude-3-5-sonnet-20240620
+                        """

-            if llm_provider == 'qwen':
+            if llm_provider == "qwen":
                if not llm_model_name:
                    llm_model_name = "qwen-max"
                with llm_helper:
@@ -253,7 +303,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 比如 qwen-max，[点击查看模型列表](https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction#3ef6d0bcf91wy)
                           """

-            if llm_provider == 'g4f':
+            if llm_provider == "g4f":
                if not llm_model_name:
                    llm_model_name = "gpt-3.5-turbo"
                with llm_helper:
@@ -264,7 +314,7 @@ if not config.app.get("hide_config", False):
                           - **Base Url**: 留空
                           - **Model Name**: 比如 gpt-3.5-turbo，[点击查看模型列表](https://github.com/xtekky/gpt4free/blob/main/g4f/models.py#L308)
                           """
-            if llm_provider == 'azure':
+            if llm_provider == "azure":
                with llm_helper:
                    tips = """
                           ##### Azure 配置说明
@@ -274,7 +324,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 填写你实际的部署名
                           """

-            if llm_provider == 'gemini':
+            if llm_provider == "gemini":
                if not llm_model_name:
                    llm_model_name = "gemini-1.0-pro"

@@ -287,7 +337,7 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 比如 gemini-1.0-pro
                           """

-            if llm_provider == 'deepseek':
+            if llm_provider == "deepseek":
                if not llm_model_name:
                    llm_model_name = "deepseek-chat"
                if not llm_base_url:
@@ -300,14 +350,36 @@ if not config.app.get("hide_config", False):
                           - **Model Name**: 固定为 deepseek-chat
                           """

-            if tips and config.ui['language'] == 'zh':
+            if llm_provider == "ernie":
+                with llm_helper:
+                    tips = """
+                           ##### 百度文心一言 配置说明
+                           - **API Key**: [点击到官网申请](https://console.bce.baidu.com/qianfan/ais/console/applicationConsole/application)
+                           - **Secret Key**: [点击到官网申请](https://console.bce.baidu.com/qianfan/ais/console/applicationConsole/application)
+                           - **Base Url**: 填写 **请求地址** [点击查看文档](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/jlil56u11#%E8%AF%B7%E6%B1%82%E8%AF%B4%E6%98%8E)
+                           """
+
+            if tips and config.ui["language"] == "zh":
                st.warning(
-                    "中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商\n- 国内可直接访问，不需要VPN \n- 注册就送额度，基本够用")
+                    "中国用户建议使用 **DeepSeek** 或 **Moonshot** 作为大模型提供商\n- 国内可直接访问，不需要VPN \n- 注册就送额度，基本够用"
+                )
                st.info(tips)

-            st_llm_api_key = st.text_input(tr("API Key"), value=llm_api_key, type="password")
+            st_llm_api_key = st.text_input(
+                tr("API Key"), value=llm_api_key, type="password"
+            )
            st_llm_base_url = st.text_input(tr("Base Url"), value=llm_base_url)
-            st_llm_model_name = st.text_input(tr("Model Name"), value=llm_model_name)
+            st_llm_model_name = ""
+            if llm_provider != "ernie":
+                st_llm_model_name = st.text_input(
+                    tr("Model Name"),
+                    value=llm_model_name,
+                    key=f"{llm_provider}_model_name_input",
+                )
+                if st_llm_model_name:
+                    config.app[f"{llm_provider}_model_name"] = st_llm_model_name
+            else:
+                st_llm_model_name = None

            if st_llm_api_key:
                config.app[f"{llm_provider}_api_key"] = st_llm_api_key
@@ -315,13 +387,21 @@ if not config.app.get("hide_config", False):
                config.app[f"{llm_provider}_base_url"] = st_llm_base_url
            if st_llm_model_name:
                config.app[f"{llm_provider}_model_name"] = st_llm_model_name
+            if llm_provider == "ernie":
+                st_llm_secret_key = st.text_input(
+                    tr("Secret Key"), value=llm_secret_key, type="password"
+                )
+                config.app[f"{llm_provider}_secret_key"] = st_llm_secret_key

-            if llm_provider == 'cloudflare':
-                st_llm_account_id = st.text_input(tr("Account ID"), value=llm_account_id)
+            if llm_provider == "cloudflare":
+                st_llm_account_id = st.text_input(
+                    tr("Account ID"), value=llm_account_id
+                )
                if st_llm_account_id:
                    config.app[f"{llm_provider}_account_id"] = st_llm_account_id

        with right_config_panel:
+
            def get_keys_from_config(cfg_key):
                api_keys = config.app.get(cfg_key, [])
                if isinstance(api_keys, str):
@@ -329,19 +409,21 @@ if not config.app.get("hide_config", False):
                api_key = ", ".join(api_keys)
                return api_key

-
            def save_keys_to_config(cfg_key, value):
                value = value.replace(" ", "")
                if value:
                    config.app[cfg_key] = value.split(",")

-
            pexels_api_key = get_keys_from_config("pexels_api_keys")
-            pexels_api_key = st.text_input(tr("Pexels API Key"), value=pexels_api_key, type="password")
+            pexels_api_key = st.text_input(
+                tr("Pexels API Key"), value=pexels_api_key, type="password"
+            )
            save_keys_to_config("pexels_api_keys", pexels_api_key)

            pixabay_api_key = get_keys_from_config("pixabay_api_keys")
-            pixabay_api_key = st.text_input(tr("Pixabay API Key"), value=pixabay_api_key, type="password")
+            pixabay_api_key = st.text_input(
+                tr("Pixabay API Key"), value=pixabay_api_key, type="password"
+            )
            save_keys_to_config("pixabay_api_keys", pixabay_api_key)

 panel = st.columns(3)
@@ -355,8 +437,9 @@ uploaded_files = []
 with left_panel:
    with st.container(border=True):
        st.write(tr("Video Script Settings"))
-        params.video_subject = st.text_input(tr("Video Subject"),
-                                             value=st.session_state['video_subject']).strip()
+        params.video_subject = st.text_input(
+            tr("Video Subject"), value=st.session_state["video_subject"]
+        ).strip()

        video_languages = [
            (tr("Auto Detect"), ""),
@@ -364,24 +447,27 @@ with left_panel:
        for code in support_locales:
            video_languages.append((code, code))

-        selected_index = st.selectbox(tr("Script Language"),
-                                      index=0,
-                                      options=range(len(video_languages)),  # 使用索引作为内部选项值
-                                      format_func=lambda x: video_languages[x][0]  # 显示给用户的是标签
-                                      )
+        selected_index = st.selectbox(
+            tr("Script Language"),
+            index=0,
+            options=range(len(video_languages)),  # 使用索引作为内部选项值
+            format_func=lambda x: video_languages[x][0],  # 显示给用户的是标签
+        )
        params.video_language = video_languages[selected_index][1]

-        if st.button(tr("Generate Video Script and Keywords"), key="auto_generate_script"):
+        if st.button(
+            tr("Generate Video Script and Keywords"), key="auto_generate_script"
+        ):
            with st.spinner(tr("Generating Video Script and Keywords")):
-                script = llm.generate_script(video_subject=params.video_subject, language=params.video_language)
+                script = llm.generate_script(
+                    video_subject=params.video_subject, language=params.video_language
+                )
                terms = llm.generate_terms(params.video_subject, script)
-                st.session_state['video_script'] = script
-                st.session_state['video_terms'] = ", ".join(terms)
+                st.session_state["video_script"] = script
+                st.session_state["video_terms"] = ", ".join(terms)

        params.video_script = st.text_area(
-            tr("Video Script"),
-            value=st.session_state['video_script'],
-            height=280
+            tr("Video Script"), value=st.session_state["video_script"], height=280
        )
        if st.button(tr("Generate Video Keywords"), key="auto_generate_terms"):
            if not params.video_script:
@@ -390,12 +476,11 @@ with left_panel:

            with st.spinner(tr("Generating Video Keywords")):
                terms = llm.generate_terms(params.video_subject, params.video_script)
-                st.session_state['video_terms'] = ", ".join(terms)
+                st.session_state["video_terms"] = ", ".join(terms)

        params.video_terms = st.text_area(
-            tr("Video Keywords"),
-            value=st.session_state['video_terms'],
-            height=50)
+            tr("Video Keywords"), value=st.session_state["video_terms"], height=50
+        )

 with middle_panel:
    with st.container(border=True):
@@ -414,73 +499,93 @@ with middle_panel:
        ]

        saved_video_source_name = config.app.get("video_source", "pexels")
-        saved_video_source_index = [v[1] for v in video_sources].index(saved_video_source_name)
+        saved_video_source_index = [v[1] for v in video_sources].index(
+            saved_video_source_name
+        )

-        selected_index = st.selectbox(tr("Video Source"),
-                                      options=range(len(video_sources)),
-                                      format_func=lambda x: video_sources[x][0],
-                                      index=saved_video_source_index
-                                      )
+        selected_index = st.selectbox(
+            tr("Video Source"),
+            options=range(len(video_sources)),
+            format_func=lambda x: video_sources[x][0],
+            index=saved_video_source_index,
+        )
        params.video_source = video_sources[selected_index][1]
        config.app["video_source"] = params.video_source

-        if params.video_source == 'local':
+        if params.video_source == "local":
            _supported_types = FILE_TYPE_VIDEOS + FILE_TYPE_IMAGES
-            uploaded_files = st.file_uploader("Upload Local Files",
-                                              type=["mp4", "mov", "avi", "flv", "mkv", "jpg", "jpeg", "png"],
-                                              accept_multiple_files=True)
+            uploaded_files = st.file_uploader(
+                "Upload Local Files",
+                type=["mp4", "mov", "avi", "flv", "mkv", "jpg", "jpeg", "png"],
+                accept_multiple_files=True,
+            )

-        selected_index = st.selectbox(tr("Video Concat Mode"),
-                                      index=1,
-                                      options=range(len(video_concat_modes)),  # 使用索引作为内部选项值
-                                      format_func=lambda x: video_concat_modes[x][0]  # 显示给用户的是标签
-                                      )
-        params.video_concat_mode = VideoConcatMode(video_concat_modes[selected_index][1])
+        selected_index = st.selectbox(
+            tr("Video Concat Mode"),
+            index=1,
+            options=range(len(video_concat_modes)),  # 使用索引作为内部选项值
+            format_func=lambda x: video_concat_modes[x][0],  # 显示给用户的是标签
+        )
+        params.video_concat_mode = VideoConcatMode(
+            video_concat_modes[selected_index][1]
+        )

        video_aspect_ratios = [
            (tr("Portrait"), VideoAspect.portrait.value),
            (tr("Landscape"), VideoAspect.landscape.value),
        ]
-        selected_index = st.selectbox(tr("Video Ratio"),
-                                      options=range(len(video_aspect_ratios)),  # 使用索引作为内部选项值
-                                      format_func=lambda x: video_aspect_ratios[x][0]  # 显示给用户的是标签
-                                      )
+        selected_index = st.selectbox(
+            tr("Video Ratio"),
+            options=range(len(video_aspect_ratios)),  # 使用索引作为内部选项值
+            format_func=lambda x: video_aspect_ratios[x][0],  # 显示给用户的是标签
+        )
        params.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])

-        params.video_clip_duration = st.selectbox(tr("Clip Duration"), options=[2, 3, 4, 5, 6], index=1)
-        params.video_count = st.selectbox(tr("Number of Videos Generated Simultaneously"), options=[1, 2, 3, 4, 5],
-                                          index=0)
+        params.video_clip_duration = st.selectbox(
+            tr("Clip Duration"), options=[2, 3, 4, 5, 6, 7, 8, 9, 10], index=1
+        )
+        params.video_count = st.selectbox(
+            tr("Number of Videos Generated Simultaneously"),
+            options=[1, 2, 3, 4, 5],
+            index=0,
+        )
    with st.container(border=True):
        st.write(tr("Audio Settings"))

        # tts_providers = ['edge', 'azure']
        # tts_provider = st.selectbox(tr("TTS Provider"), tts_providers)

-        voices = voice.get_all_azure_voices(
-            filter_locals=support_locales)
+        voices = voice.get_all_azure_voices(filter_locals=support_locales)
        friendly_names = {
-            v: v.
-            replace("Female", tr("Female")).
-            replace("Male", tr("Male")).
-            replace("Neural", "") for
-            v in voices}
+            v: v.replace("Female", tr("Female"))
+            .replace("Male", tr("Male"))
+            .replace("Neural", "")
+            for v in voices
+        }
        saved_voice_name = config.ui.get("voice_name", "")
        saved_voice_name_index = 0
        if saved_voice_name in friendly_names:
            saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
        else:
            for i, v in enumerate(voices):
-                if v.lower().startswith(st.session_state['ui_language'].lower()) and "V2" not in v:
+                if (
+                    v.lower().startswith(st.session_state["ui_language"].lower())
+                    and "V2" not in v
+                ):
                    saved_voice_name_index = i
                    break

-        selected_friendly_name = st.selectbox(tr("Speech Synthesis"),
-                                              options=list(friendly_names.values()),
-                                              index=saved_voice_name_index)
+        selected_friendly_name = st.selectbox(
+            tr("Speech Synthesis"),
+            options=list(friendly_names.values()),
+            index=saved_voice_name_index,
+        )

-        voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)]
+        voice_name = list(friendly_names.keys())[
+            list(friendly_names.values()).index(selected_friendly_name)
+        ]
        params.voice_name = voice_name
-        config.ui['voice_name'] = voice_name
+        config.ui["voice_name"] = voice_name

        if st.button(tr("Play Voice")):
            play_content = params.video_subject
@@ -491,11 +596,21 @@ with middle_panel:
            with st.spinner(tr("Synthesizing Voice")):
                temp_dir = utils.storage_dir("temp", create=True)
                audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
-                sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_file=audio_file)
+                sub_maker = voice.tts(
+                    text=play_content,
+                    voice_name=voice_name,
+                    voice_rate=params.voice_rate,
+                    voice_file=audio_file,
+                )
                # if the voice file generation failed, try again with a default content.
                if not sub_maker:
                    play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
-                    sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_file=audio_file)
+                    sub_maker = voice.tts(
+                        text=play_content,
+                        voice_name=voice_name,
+                        voice_rate=params.voice_rate,
+                        voice_file=audio_file,
+                    )

                if sub_maker and os.path.exists(audio_file):
                    st.audio(audio_file, format="audio/mp3")
@@ -503,25 +618,40 @@ with middle_panel:
                        os.remove(audio_file)

        if voice.is_azure_v2_voice(voice_name):
-            saved_azure_speech_region = config.azure.get(f"speech_region", "")
-            saved_azure_speech_key = config.azure.get(f"speech_key", "")
-            azure_speech_region = st.text_input(tr("Speech Region"), value=saved_azure_speech_region)
-            azure_speech_key = st.text_input(tr("Speech Key"), value=saved_azure_speech_key, type="password")
+            saved_azure_speech_region = config.azure.get("speech_region", "")
+            saved_azure_speech_key = config.azure.get("speech_key", "")
+            azure_speech_region = st.text_input(
+                tr("Speech Region"), value=saved_azure_speech_region
+            )
+            azure_speech_key = st.text_input(
+                tr("Speech Key"), value=saved_azure_speech_key, type="password"
+            )
            config.azure["speech_region"] = azure_speech_region
            config.azure["speech_key"] = azure_speech_key

-        params.voice_volume = st.selectbox(tr("Speech Volume"),
-                                           options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0], index=2)
+        params.voice_volume = st.selectbox(
+            tr("Speech Volume"),
+            options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0],
+            index=2,
+        )
+
+        params.voice_rate = st.selectbox(
+            tr("Speech Rate"),
+            options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
+            index=2,
+        )
+
        bgm_options = [
            (tr("No Background Music"), ""),
            (tr("Random Background Music"), "random"),
            (tr("Custom Background Music"), "custom"),
        ]
-        selected_index = st.selectbox(tr("Background Music"),
-                                      index=1,
-                                      options=range(len(bgm_options)),  # 使用索引作为内部选项值
-                                      format_func=lambda x: bgm_options[x][0]  # 显示给用户的是标签
-                                      )
+        selected_index = st.selectbox(
+            tr("Background Music"),
+            index=1,
+            options=range(len(bgm_options)),  # 使用索引作为内部选项值
+            format_func=lambda x: bgm_options[x][0],  # 显示给用户的是标签
+        )
        # 获取选择的背景音乐类型
        params.bgm_type = bgm_options[selected_index][1]

@@ -531,8 +661,11 @@ with middle_panel:
            if custom_bgm_file and os.path.exists(custom_bgm_file):
                params.bgm_file = custom_bgm_file
                # st.write(f":red[已选择自定义背景音乐]：**{custom_bgm_file}**")
-        params.bgm_volume = st.selectbox(tr("Background Music Volume"),
-                                         options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)
+        params.bgm_volume = st.selectbox(
+            tr("Background Music Volume"),
+            options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+            index=2,
+        )

 with right_panel:
    with st.container(border=True):
@@ -543,31 +676,48 @@ with right_panel:
        saved_font_name_index = 0
        if saved_font_name in font_names:
            saved_font_name_index = font_names.index(saved_font_name)
-        params.font_name = st.selectbox(tr("Font"), font_names, index=saved_font_name_index)
-        config.ui['font_name'] = params.font_name
+        params.font_name = st.selectbox(
+            tr("Font"), font_names, index=saved_font_name_index
+        )
+        config.ui["font_name"] = params.font_name

        subtitle_positions = [
            (tr("Top"), "top"),
            (tr("Center"), "center"),
            (tr("Bottom"), "bottom"),
+            (tr("Custom"), "custom"),
        ]
-        selected_index = st.selectbox(tr("Position"),
-                                      index=2,
-                                      options=range(len(subtitle_positions)),  # 使用索引作为内部选项值
-                                      format_func=lambda x: subtitle_positions[x][0]  # 显示给用户的是标签
-                                      )
+        selected_index = st.selectbox(
+            tr("Position"),
+            index=2,
+            options=range(len(subtitle_positions)),
+            format_func=lambda x: subtitle_positions[x][0],
+        )
        params.subtitle_position = subtitle_positions[selected_index][1]

+        if params.subtitle_position == "custom":
+            custom_position = st.text_input(
+                tr("Custom Position (% from top)"), value="70.0"
+            )
+            try:
+                params.custom_position = float(custom_position)
+                if params.custom_position < 0 or params.custom_position > 100:
+                    st.error(tr("Please enter a value between 0 and 100"))
+            except ValueError:
+                st.error(tr("Please enter a valid number"))
+
        font_cols = st.columns([0.3, 0.7])
        with font_cols[0]:
            saved_text_fore_color = config.ui.get("text_fore_color", "#FFFFFF")
-            params.text_fore_color = st.color_picker(tr("Font Color"), saved_text_fore_color)
-            config.ui['text_fore_color'] = params.text_fore_color
+            params.text_fore_color = st.color_picker(
+                tr("Font Color"), saved_text_fore_color
+            )
+            config.ui["text_fore_color"] = params.text_fore_color

        with font_cols[1]:
            saved_font_size = config.ui.get("font_size", 60)
            params.font_size = st.slider(tr("Font Size"), 30, 100, saved_font_size)
-            config.ui['font_size'] = params.font_size
+            config.ui["font_size"] = params.font_size

        stroke_cols = st.columns([0.3, 0.7])
        with stroke_cols[0]:
@@ -584,7 +734,7 @@ if start_button:
        scroll_to_bottom()
        st.stop()

-    if llm_provider != 'g4f' and not config.app.get(f"{llm_provider}_api_key", ""):
+    if llm_provider != "g4f" and not config.app.get(f"{llm_provider}_api_key", ""):
        st.error(tr("Please Enter the LLM API Key"))
        scroll_to_bottom()
        st.stop()
@@ -620,13 +770,13 @@ if start_button:
    log_container = st.empty()
    log_records = []

-
    def log_received(msg):
+        if config.ui["hide_log"]:
+            return
        with log_container:
            log_records.append(msg)
            st.code("\n".join(log_records))

-
    logger.add(log_received)

    st.toast(tr("Generating Video"))
@@ -648,7 +798,7 @@ if start_button:
            player_cols = st.columns(len(video_files) * 2 + 1)
            for i, url in enumerate(video_files):
                player_cols[i * 2 + 1].video(url)
-    except Exception as e:
+    except Exception:
        pass

    open_task_folder(task_id)
--- a/webui/i18n/de.json
+++ b/webui/i18n/de.json
@@ -26,6 +26,7 @@
    "Speech Region": "Region(:red[Required，[Get Region](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "API Key(:red[Required，[Get API Key](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "Lautstärke der Sprachausgabe",
+    "Speech Rate": "Lesegeschwindigkeit (1,0 bedeutet 1x)",
    "Male": "Männlich",
    "Female": "Weiblich",
    "Background Music": "Hintergrundmusik",
@@ -41,6 +42,7 @@
    "Top": "Oben",
    "Center": "Mittig",
    "Bottom": "Unten (empfohlen)",
+    "Custom": "Benutzerdefinierte Position (70, was 70% von oben bedeutet)",
    "Font Size": "Schriftgröße für Untertitel",
    "Font Color": "Schriftfarbe",
    "Stroke Color": "Kontur",
--- a/webui/i18n/en.json
+++ b/webui/i18n/en.json
@@ -26,6 +26,7 @@
    "Speech Region": "Region(:red[Required，[Get Region](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "API Key(:red[Required，[Get API Key](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "Speech Volume (1.0 represents 100%)",
+    "Speech Rate": "Speech Rate (1.0 means 1x speed)",
    "Male": "Male",
    "Female": "Female",
    "Background Music": "Background Music",
@@ -41,6 +42,7 @@
    "Top": "Top",
    "Center": "Center",
    "Bottom": "Bottom (Recommended)",
+    "Custom": "Custom position (70, indicating 70% down from the top)",
    "Font Size": "Subtitle Font Size",
    "Font Color": "Subtitle Font Color",
    "Stroke Color": "Subtitle Outline Color",
@@ -73,6 +75,7 @@
    "Play Voice": "Play Voice",
    "Voice Example": "This is an example text for testing speech synthesis",
    "Synthesizing Voice": "Synthesizing voice, please wait...",
-    "TTS Provider": "Select the voice synthesis provider"
+    "TTS Provider": "Select the voice synthesis provider",
+    "Hide Log": "Hide Log"
  }
 }
--- a/webui/i18n/vi.json
+++ b/webui/i18n/vi.json
@@ -26,6 +26,7 @@
    "Speech Region": "Vùng(:red[Bắt Buộc，[Lấy Vùng](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "Khóa API(:red[Bắt Buộc，[Lấy Khóa API](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "Âm Lượng Giọng Đọc (1.0 đại diện cho 100%)",
+    "Speech Rate": "Tốc độ đọc (1.0 biểu thị tốc độ gốc)",
    "Male": "Nam",
    "Female": "Nữ",
    "Background Music": "Âm Nhạc Nền",
@@ -41,6 +42,7 @@
    "Top": "Trên",
    "Center": "Giữa",
    "Bottom": "Dưới (Được Khuyến Nghị)",
+    "Custom": "Vị trí tùy chỉnh (70, chỉ ra là cách đầu trang 70%)",
    "Font Size": "Cỡ Chữ Phụ Đề",
    "Font Color": "Màu Chữ Phụ Đề",
    "Stroke Color": "Màu Viền Phụ Đề",
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@@ -26,6 +26,7 @@
    "Speech Region": "服务区域 (:red[必填，[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "API Key (:red[必填，密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "朗读音量（1.0表示100%）",
+    "Speech Rate": "朗读速度（1.0表示1倍速）",
    "Male": "男性",
    "Female": "女性",
    "Background Music": "背景音乐",
@@ -41,6 +42,7 @@
    "Top": "顶部",
    "Center": "中间",
    "Bottom": "底部（推荐）",
+    "Custom": "自定义位置（70，表示离顶部70%的位置）",
    "Font Size": "字幕大小",
    "Font Color": "字幕颜色",
    "Stroke Color": "描边颜色",
@@ -54,8 +56,8 @@
    "You can download the generated video from the following links": "你可以从以下链接下载生成的视频",
    "Basic Settings": "**基础设置** (:blue[点击展开])",
    "Language": "界面语言",
-    "Pexels API Key": "Pexels API Key ([点击获取](https://www.pexels.com/api/))",
-    "Pixabay API Key": "Pixabay API Key ([点击获取](https://pixabay.com/api/docs/#api_search_videos))",
+    "Pexels API Key": "Pexels API Key ([点击获取](https://www.pexels.com/api/)) :red[推荐使用]",
+    "Pixabay API Key": "Pixabay API Key ([点击获取](https://pixabay.com/api/docs/#api_search_videos)) :red[可以不用配置，如果 Pexels 无法使用，再选择Pixabay]",
    "LLM Provider": "大模型提供商",
    "API Key": "API Key (:red[必填，需要到大模型提供商的后台申请])",
    "Base Url": "Base Url (可选)",
@@ -73,6 +75,7 @@
    "Play Voice": "试听语音合成",
    "Voice Example": "这是一段测试语音合成的示例文本",
    "Synthesizing Voice": "语音合成中，请稍候...",
-    "TTS Provider": "语音合成提供商"
+    "TTS Provider": "语音合成提供商",
+    "Hide Log": "隐藏日志"
  }
 }
Author	SHA1	Message	Date
Harry	edc4df6eb5	Merge pull request #466 from harry0703/dev fixed: subtitle generation failure	2024-07-26 17:56:32 +08:00
harry	5ed98d317c	fixed: subtitle generation failure	2024-07-26 17:55:26 +08:00
Harry	c22ef5f1d2	Merge pull request #462 from harry0703/dev update readme	2024-07-25 15:00:07 +08:00
harry	bcc9621976	update readme	2024-07-25 14:59:45 +08:00
Harry	6512e3f140	Merge pull request #461 from harry0703/dev Optimize memory usage in moviepy	2024-07-25 13:58:46 +08:00
harry	931e1a0caa	Optimize memory usage in moviepy Upgrade version number to 1.2.0	2024-07-25 13:57:39 +08:00
yyhhyy	84ae8e5248	Merge pull request #460 from yyhhyyyyyy/code-formatting Code Formatting	2024-07-25 13:39:05 +08:00
yyhhyyyyyy	5c2db3aa92	resolve issue with video concatenation order always being random	2024-07-25 13:36:21 +08:00
yyhhyyyyyy	905841965a	Format project code	2024-07-24 14:59:06 +08:00
Harry	bbd4e94941	Merge pull request #459 from yyhhyyyyyy/customize-subtitle-position feat: support custom subtitle positioning	2024-07-24 14:35:50 +08:00
yyhhyyyyyy	b89250874b	Change default value to 70.0	2024-07-24 14:31:56 +08:00
yyhhyyyyyy	e8b20c697d	feat: support custom subtitle positioning	2024-07-24 14:25:20 +08:00
Harry	e64041c93d	Merge pull request #458 from yyhhyyyyyy/refactor-task-add-subtitle-api Refactor task.py and add subtitle API	2024-07-24 11:47:27 +08:00
yyhhyyyyyy	17b4a61e64	1.Refactor task.py to encapsulate separable functions. 2.Add a new subtitle API.	2024-07-23 17:00:23 +08:00
Harry	6d520a4266	Merge pull request #453 from yyhhyyyyyy/fit-oneapi fit(oneapi):Fix the issue where model_name is always empty when using OneAPI as the LLM source.	2024-07-22 10:38:10 +08:00
yyhhyyyyyy	7ff8467f9d	Fix the issue where model_name is always empty when using OneAPI as the LLM source.	2024-07-20 09:36:19 +08:00
Harry	4cf9cefb5c	Merge pull request #450 from yyhhyyyyyy/fit-subtitle-correct fit(subtitle):Fix subtitle correction logic	2024-07-20 08:25:25 +08:00
yyhhyyyyyy	33534db8bb	1. .gitignore ignores the models folder 2. Fix subtitle correction logic	2024-07-19 15:00:17 +08:00
Harry	ec16f1c41b	Merge pull request #449 from harry0703/dev update readme	2024-07-19 14:21:56 +08:00
harry	9653d7d18a	update readme	2024-07-19 14:21:35 +08:00
Harry	36a367d713	Merge pull request #448 from yyhhyyyyyy/add-rate feat(azure_tts_v1): Allows to control the speed of speech generation.	2024-07-19 14:17:15 +08:00
yyhhyyyyyy	77b304537a	Speech Rate	2024-07-19 11:15:36 +08:00
yyhhyyyyyy	63fb848a17	1. Add azure_tts_v1 to control the speed of speech	2024-07-19 11:06:34 +08:00
Harry	6853163905	Merge pull request #447 from harry0703/dev update readme	2024-07-15 14:09:55 +08:00
harry	052c29b579	update readme	2024-07-15 14:09:33 +08:00
Harry	df62529f2a	Merge pull request #443 from harry0703/dev update readme	2024-07-09 13:41:04 +08:00
harry	934eff13ae	update readme	2024-07-09 13:40:43 +08:00
Harry	0472338184	Merge pull request #437 from harry0703/dev support baidu ERNIE llm	2024-07-03 21:13:51 +08:00
harry	66c81a04bf	support baidu ERNIE llm	2024-07-03 21:12:21 +08:00
Harry	8dd66cf624	Merge pull request #435 from harry0703/dev update readme	2024-07-02 10:00:53 +08:00
harry	dca23d99e4	update readme	2024-07-02 09:57:53 +08:00
Harry	42560cc7f5	Merge pull request #421 from harry0703/dev update readme	2024-06-21 11:01:41 +08:00
harry	11478063e7	update readme	2024-06-21 11:01:15 +08:00
Harry	bf0dbcc045	Merge pull request #414 from harry0703/dev update readme	2024-06-15 17:37:36 +08:00
harry	43df593ac3	update readme	2024-06-15 17:36:37 +08:00
Harry	7cf21c6541	Merge pull request #408 from harry0703/dev update readme	2024-06-11 11:50:48 +08:00
harry	f76f905833	update readme	2024-06-11 11:48:04 +08:00
Harry	0f27c26042	Merge pull request #399 from harry0703/dev update readme	2024-06-04 10:36:18 +08:00
harry	e1d7318cee	update readme	2024-06-04 10:34:32 +08:00
Harry	6408c31b7f	Merge pull request #391 from harry0703/dev update readme	2024-05-28 18:41:24 +08:00
harry	b0d694db08	update readme	2024-05-28 14:51:03 +08:00
Harry	730c2a461a	Merge pull request #381 from harry0703/dev update readme	2024-05-23 18:21:05 +08:00
harry	bdb49a4c82	update readme	2024-05-23 18:20:45 +08:00
Harry	a4692060a0	Merge pull request #372 from harry0703/dev enhanced exception handling for generating terms	2024-05-17 17:12:13 +08:00
harry	fc6844dd19	enhanced exception handling for generating terms	2024-05-17 17:11:35 +08:00
Harry	d740a6babd	Merge pull request #370 from harry0703/dev update readme	2024-05-17 08:44:01 +08:00
harry	9c58991830	update readme	2024-05-17 08:43:35 +08:00