1. Add azure_tts_v1 to control the speed of speech

2024-07-19 11:06:34 +08:00
parent 6853163905
commit 63fb848a17
5 changed files with 25 additions and 7 deletions
--- a/webui/Main.py
+++ b/webui/Main.py
@@ -510,11 +510,11 @@ with middle_panel:
            with st.spinner(tr("Synthesizing Voice")):
                temp_dir = utils.storage_dir("temp", create=True)
                audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
-                sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_file=audio_file)
+                sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_rate=params.voice_rate, voice_file=audio_file)
                # if the voice file generation failed, try again with a default content.
                if not sub_maker:
                    play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
-                    sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_file=audio_file)
+                    sub_maker = voice.tts(text=play_content, voice_name=voice_name, voice_rate=params.voice_rate, voice_file=audio_file)

                if sub_maker and os.path.exists(audio_file):
                    st.audio(audio_file, format="audio/mp3")
@@ -531,6 +531,10 @@ with middle_panel:

        params.voice_volume = st.selectbox(tr("Speech Volume"),
                                           options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0], index=2)
+        
+        params.voice_rate = st.selectbox(tr("Speech Rate"),
+                                           options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], index=2)
+        
        bgm_options = [
            (tr("No Background Music"), ""),
            (tr("Random Background Music"), "random"),
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@@ -26,6 +26,7 @@
    "Speech Region": "服务区域 (:red[必填，[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Key": "API Key (:red[必填，密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
    "Speech Volume": "朗读音量（1.0表示100%）",
+    "Speech Rate": "朗读速度（1.0表示1倍速）",
    "Male": "男性",
    "Female": "女性",
    "Background Music": "背景音乐",