1. Added multi-language support to the UI

2. Optimized the voice name 3. Other UI optimizations
2024-03-29 17:13:25 +08:00
parent a7ba661053
commit bc8e005f59
6 changed files with 1293 additions and 137 deletions
--- a/webui/Main.py
+++ b/webui/Main.py
@@ -1,15 +1,28 @@
+import json
+import locale
 import streamlit as st
-
-st.set_page_config(page_title="MoneyPrinterTurbo", page_icon="🤖", layout="wide",
-                   initial_sidebar_state="auto")
 import sys
 import os
 from uuid import uuid4
 import platform
 import streamlit.components.v1 as components
+import toml
 from loguru import logger
-from app.models.schema import VideoParams, VideoAspect, VoiceNames, VideoConcatMode
-from app.services import task as tm, llm
+
+st.set_page_config(page_title="MoneyPrinterTurbo",
+                   page_icon="🤖",
+                   layout="wide",
+                   initial_sidebar_state="auto",
+                   menu_items={
+                       'Report a bug': "https://github.com/harry0703/MoneyPrinterTurbo/issues",
+                       'About': "# MoneyPrinterTurbo\nSimply provide a topic or keyword for a video, and it will "
+                                "automatically generate the video copy, video materials, video subtitles, "
+                                "and video background music before synthesizing a high-definition short "
+                                "video.\n\nhttps://github.com/harry0703/MoneyPrinterTurbo"
+                   })
+
+from app.models.schema import VideoParams, VideoAspect, VideoConcatMode
+from app.services import task as tm, llm, voice
 from app.utils import utils

 hide_streamlit_style = """
@@ -21,6 +34,35 @@ st.title("MoneyPrinterTurbo")
 root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 font_dir = os.path.join(root_dir, "resource", "fonts")
 song_dir = os.path.join(root_dir, "resource", "songs")
+i18n_dir = os.path.join(root_dir, "webui", "i18n")
+config_file = os.path.join(root_dir, "webui", ".streamlit", "webui.toml")
+
+
+def load_config() -> dict:
+    try:
+        return toml.load(config_file)
+    except Exception as e:
+        return {}
+
+
+cfg = load_config()
+
+
+def save_config():
+    with open(config_file, "w", encoding="utf-8") as f:
+        f.write(toml.dumps(cfg))
+
+
+def get_system_locale():
+    try:
+        loc = locale.getdefaultlocale()
+        # zh_CN, zh_TW return zh
+        # en_US, en_GB return en
+        language_code = loc[0].split("_")[0]
+        return language_code
+    except Exception as e:
+        return "en"
+

 if 'video_subject' not in st.session_state:
    st.session_state['video_subject'] = ''
@@ -28,6 +70,8 @@ if 'video_script' not in st.session_state:
    st.session_state['video_script'] = ''
 if 'video_terms' not in st.session_state:
    st.session_state['video_terms'] = ''
+if 'ui_language' not in st.session_state:
+    st.session_state['ui_language'] = cfg.get("ui_language", get_system_locale())


 def get_all_fonts():
@@ -109,113 +153,154 @@ def init_log():

 init_log()

+
+def load_locales():
+    locales = {}
+    for root, dirs, files in os.walk(i18n_dir):
+        for file in files:
+            if file.endswith(".json"):
+                lang = file.split(".")[0]
+                with open(os.path.join(root, file), "r", encoding="utf-8") as f:
+                    locales[lang] = json.loads(f.read())
+    return locales
+
+
+locales = load_locales()
+
+
+def tr(key):
+    loc = locales.get(st.session_state['ui_language'], {})
+    return loc.get("Translation", {}).get(key, key)
+
+
+display_languages = []
+selected_index = 0
+for i, code in enumerate(locales.keys()):
+    display_languages.append(f"{code} - {locales[code].get('Language')}")
+    if code == st.session_state['ui_language']:
+        selected_index = i
+
+selected_language = st.selectbox("Language", options=display_languages, label_visibility='collapsed',
+                                 index=selected_index)
+if selected_language:
+    code = selected_language.split(" - ")[0].strip()
+    st.session_state['ui_language'] = code
+    cfg['ui_language'] = code
+    save_config()
+
 panel = st.columns(3)
 left_panel = panel[0]
 middle_panel = panel[1]
 right_panel = panel[2]

-cfg = VideoParams()
+params = VideoParams()

 with left_panel:
    with st.container(border=True):
-        st.write("**文案设置**")
-        cfg.video_subject = st.text_input("视频主题（给定一个关键词，:red[AI自动生成]视频文案）",
-                                          value=st.session_state['video_subject']).strip()
+        st.write(tr("Video Script Settings"))
+        params.video_subject = st.text_input(tr("Video Subject"),
+                                             value=st.session_state['video_subject']).strip()

        video_languages = [
-            ("自动判断（Auto detect）", ""),
+            (tr("Auto Detect"), ""),
        ]
-        for lang in ["zh-CN", "zh-TW", "en-US"]:
-            video_languages.append((lang, lang))
+        for code in ["zh-CN", "zh-TW", "en-US"]:
+            video_languages.append((code, code))

-        selected_index = st.selectbox("生成视频脚本的语言（:blue[一般情况AI会自动根据你输入的主题语言输出]）",
+        selected_index = st.selectbox(tr("Script Language"),
                                      index=0,
                                      options=range(len(video_languages)),  # 使用索引作为内部选项值
                                      format_func=lambda x: video_languages[x][0]  # 显示给用户的是标签
                                      )
-        cfg.video_language = video_languages[selected_index][1]
+        params.video_language = video_languages[selected_index][1]

-        if cfg.video_language:
-            st.write(f"设置AI输出文案语言为: **:red[{cfg.video_language}]**")
-
-        if st.button("点击使用AI根据**主题**生成 【视频文案】 和 【视频关键词】", key="auto_generate_script"):
-            with st.spinner("AI正在生成视频文案和关键词..."):
-                script = llm.generate_script(video_subject=cfg.video_subject, language=cfg.video_language)
-                terms = llm.generate_terms(cfg.video_subject, script)
-                st.toast('AI生成成功')
+        if st.button(tr("Generate Video Script and Keywords"), key="auto_generate_script"):
+            with st.spinner(tr("Generating Video Script and Keywords")):
+                script = llm.generate_script(video_subject=params.video_subject, language=params.video_language)
+                terms = llm.generate_terms(params.video_subject, script)
                st.session_state['video_script'] = script
                st.session_state['video_terms'] = ", ".join(terms)

-        cfg.video_script = st.text_area(
-            "视频文案（:blue[①可不填，使用AI生成  ②合理使用标点断句，有助于生成字幕]）",
+        params.video_script = st.text_area(
+            tr("Video Script"),
            value=st.session_state['video_script'],
            height=180
        )
-        if st.button("点击使用AI根据**文案**生成【视频关键词】", key="auto_generate_terms"):
-            if not cfg.video_script:
-                st.error("请先填写视频文案")
+        if st.button(tr("Generate Video Keywords"), key="auto_generate_terms"):
+            if not params.video_script:
+                st.error(tr("Please Enter the Video Subject"))
                st.stop()

-            with st.spinner("AI正在生成视频关键词..."):
-                terms = llm.generate_terms(cfg.video_subject, cfg.video_script)
-                st.toast('AI生成成功')
+            with st.spinner(tr("Generating Video Keywords")):
+                terms = llm.generate_terms(params.video_subject, params.video_script)
                st.session_state['video_terms'] = ", ".join(terms)

-        cfg.video_terms = st.text_area(
-            "视频关键词（:blue[①可不填，使用AI生成 ②用**英文逗号**分隔，只支持英文]）",
+        params.video_terms = st.text_area(
+            tr("Video Keywords"),
            value=st.session_state['video_terms'],
            height=50)

 with middle_panel:
    with st.container(border=True):
-        st.write("**视频设置**")
+        st.write(tr("Video Settings"))
        video_concat_modes = [
-            ("顺序拼接", "sequential"),
-            ("随机拼接（推荐）", "random"),
+            (tr("Sequential"), "sequential"),
+            (tr("Random"), "random"),
        ]
-        selected_index = st.selectbox("视频拼接模式",
+        selected_index = st.selectbox(tr("Video Concat Mode"),
                                      index=1,
                                      options=range(len(video_concat_modes)),  # 使用索引作为内部选项值
                                      format_func=lambda x: video_concat_modes[x][0]  # 显示给用户的是标签
                                      )
-        cfg.video_concat_mode = VideoConcatMode(video_concat_modes[selected_index][1])
+        params.video_concat_mode = VideoConcatMode(video_concat_modes[selected_index][1])

        video_aspect_ratios = [
-            ("竖屏 9:16（抖音视频）", VideoAspect.portrait.value),
-            ("横屏 16:9（西瓜视频）", VideoAspect.landscape.value),
-            # ("方形 1:1", VideoAspect.square.value)
+            (tr("Portrait"), VideoAspect.portrait.value),
+            (tr("Landscape"), VideoAspect.landscape.value),
        ]
-        selected_index = st.selectbox("视频比例",
+        selected_index = st.selectbox(tr("Video Ratio"),
                                      options=range(len(video_aspect_ratios)),  # 使用索引作为内部选项值
                                      format_func=lambda x: video_aspect_ratios[x][0]  # 显示给用户的是标签
                                      )
-        cfg.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])
+        params.video_aspect = VideoAspect(video_aspect_ratios[selected_index][1])

-        cfg.video_clip_duration = st.selectbox("视频片段最大时长(秒)", options=[2, 3, 4, 5, 6], index=1)
-        cfg.video_count = st.selectbox("同时生成视频数量", options=[1, 2, 3, 4, 5], index=0)
+        params.video_clip_duration = st.selectbox(tr("Clip Duration"), options=[2, 3, 4, 5, 6], index=1)
+        params.video_count = st.selectbox(tr("Number of Videos Generated Simultaneously"), options=[1, 2, 3, 4, 5],
+                                          index=0)
    with st.container(border=True):
-        st.write("**音频设置**")
-        # 创建一个映射字典，将原始值映射到友好名称
+        st.write(tr("Audio Settings"))
+        voices = voice.get_all_voices(filter_locals=["zh-CN", "zh-HK", "zh-TW", "en-US"])
        friendly_names = {
            voice: voice.
-            replace("female", "女性").
-            replace("male", "男性").
-            replace("zh-CN", "中文").
-            replace("zh-HK", "香港").
-            replace("zh-TW", "台湾").
-            replace("en-US", "英文").
+            replace("Female", tr("Female")).
+            replace("Male", tr("Male")).
            replace("Neural", "") for
-            voice in VoiceNames}
-        selected_friendly_name = st.selectbox("朗读声音", options=list(friendly_names.values()))
+            voice in voices}
+        saved_voice_name = cfg.get("voice_name", "")
+        saved_voice_name_index = 0
+        if saved_voice_name in friendly_names:
+            saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
+        else:
+            for i, voice in enumerate(voices):
+                if voice.lower().startswith(st.session_state['ui_language'].lower()):
+                    saved_voice_name_index = i
+                    break
+
+        selected_friendly_name = st.selectbox(tr("Speech Synthesis"),
+                                              options=list(friendly_names.values()),
+                                              index=saved_voice_name_index)
+
        voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)]
-        cfg.voice_name = voice_name
+        params.voice_name = voice_name
+        cfg['voice_name'] = voice_name
+        save_config()

        bgm_options = [
-            ("无背景音乐 No BGM", ""),
-            ("随机背景音乐 Random BGM", "random"),
-            ("自定义背景音乐 Custom BGM", "custom"),
+            (tr("No Background Music"), ""),
+            (tr("Random Background Music"), "random"),
+            (tr("Custom Background Music"), "custom"),
        ]
-        selected_index = st.selectbox("背景音乐",
+        selected_index = st.selectbox(tr("Background Music"),
                                      index=1,
                                      options=range(len(bgm_options)),  # 使用索引作为内部选项值
                                      format_func=lambda x: bgm_options[x][0]  # 显示给用户的是标签
@@ -225,49 +310,49 @@ with middle_panel:

        # 根据选择显示或隐藏组件
        if bgm_type == "custom":
-            custom_bgm_file = st.text_input("请输入自定义背景音乐的文件路径：")
+            custom_bgm_file = st.text_input(tr("Custom Background Music File"))
            if custom_bgm_file and os.path.exists(custom_bgm_file):
-                cfg.bgm_file = custom_bgm_file
+                params.bgm_file = custom_bgm_file
                # st.write(f":red[已选择自定义背景音乐]：**{custom_bgm_file}**")
-        cfg.bgm_volume = st.selectbox("背景音乐音量（0.2表示20%，背景声音不宜过高）",
-                                      options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)
+        params.bgm_volume = st.selectbox(tr("Background Music Volume"),
+                                         options=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], index=2)

 with right_panel:
    with st.container(border=True):
-        st.write("**字幕设置**")
-        cfg.subtitle_enabled = st.checkbox("生成字幕（若取消勾选，下面的设置都将不生效）", value=True)
+        st.write(tr("Subtitle Settings"))
+        params.subtitle_enabled = st.checkbox(tr("Enable Subtitles"), value=True)
        font_names = get_all_fonts()
-        cfg.font_name = st.selectbox("字体", font_names)
+        params.font_name = st.selectbox(tr("Font"), font_names)

        subtitle_positions = [
-            ("顶部（top）", "top"),
-            ("居中（center）", "center"),
-            ("底部（bottom，推荐）", "bottom"),
+            (tr("Top"), "top"),
+            (tr("Middle"), "center"),
+            (tr("Bottom"), "bottom"),
        ]
-        selected_index = st.selectbox("字幕位置",
+        selected_index = st.selectbox(tr("Position"),
                                      index=2,
                                      options=range(len(subtitle_positions)),  # 使用索引作为内部选项值
                                      format_func=lambda x: subtitle_positions[x][0]  # 显示给用户的是标签
                                      )
-        cfg.subtitle_position = subtitle_positions[selected_index][1]
+        params.subtitle_position = subtitle_positions[selected_index][1]

        font_cols = st.columns([0.3, 0.7])
        with font_cols[0]:
-            cfg.text_fore_color = st.color_picker("字幕颜色", "#FFFFFF")
+            params.text_fore_color = st.color_picker(tr("Font Color"), "#FFFFFF")
        with font_cols[1]:
-            cfg.font_size = st.slider("字幕大小", 30, 100, 60)
+            params.font_size = st.slider(tr("Font Size"), 30, 100, 60)

        stroke_cols = st.columns([0.3, 0.7])
        with stroke_cols[0]:
-            cfg.stroke_color = st.color_picker("描边颜色", "#000000")
+            params.stroke_color = st.color_picker(tr("Stroke Color"), "#000000")
        with stroke_cols[1]:
-            cfg.stroke_width = st.slider("描边粗细", 0.0, 10.0, 1.5)
+            params.stroke_width = st.slider(tr("Stroke Width"), 0.0, 10.0, 1.5)

-start_button = st.button("开始生成视频", use_container_width=True, type="primary")
+start_button = st.button(tr("Generate Video"), use_container_width=True, type="primary")
 if start_button:
    task_id = str(uuid4())
-    if not cfg.video_subject and not cfg.video_script:
-        st.error("视频主题 或 视频文案，不能同时为空")
+    if not params.video_subject and not params.video_script:
+        st.error(tr("Video Script and Subject Cannot Both Be Empty"))
        scroll_to_bottom()
        st.stop()

@@ -283,11 +368,11 @@ if start_button:

    logger.add(log_received)

-    st.toast("正在生成视频，请稍候...")
-    logger.info("开始生成视频")
-    logger.info(utils.to_json(cfg))
+    st.toast(tr("Generating Video"))
+    logger.info(tr("Start Generating Video"))
+    logger.info(utils.to_json(params))
    scroll_to_bottom()

-    tm.start(task_id=task_id, params=cfg)
+    tm.start(task_id=task_id, params=params)
    open_task_folder(task_id)
-    logger.info(f"完成")
+    logger.info(tr("Video Generation Completed"))