Merge pull request #264 from harry0703/dev
support azure new speech voice and fix the bug where clip were not closed
This commit is contained in:
@@ -36,6 +36,8 @@ def save_config():
|
|||||||
_cfg["app"] = app
|
_cfg["app"] = app
|
||||||
_cfg["whisper"] = whisper
|
_cfg["whisper"] = whisper
|
||||||
_cfg["pexels"] = pexels
|
_cfg["pexels"] = pexels
|
||||||
|
_cfg["azure"] = azure
|
||||||
|
_cfg["ui"] = ui
|
||||||
f.write(toml.dumps(_cfg))
|
f.write(toml.dumps(_cfg))
|
||||||
|
|
||||||
|
|
||||||
@@ -43,6 +45,7 @@ _cfg = load_config()
|
|||||||
app = _cfg.get("app", {})
|
app = _cfg.get("app", {})
|
||||||
whisper = _cfg.get("whisper", {})
|
whisper = _cfg.get("whisper", {})
|
||||||
pexels = _cfg.get("pexels", {})
|
pexels = _cfg.get("pexels", {})
|
||||||
|
azure = _cfg.get("azure", {})
|
||||||
ui = _cfg.get("ui", {})
|
ui = _cfg.get("ui", {})
|
||||||
|
|
||||||
hostname = socket.gethostname()
|
hostname = socket.gethostname()
|
||||||
@@ -53,7 +56,7 @@ listen_port = _cfg.get("listen_port", 8080)
|
|||||||
project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
|
project_name = _cfg.get("project_name", "MoneyPrinterTurbo")
|
||||||
project_description = _cfg.get("project_description",
|
project_description = _cfg.get("project_description",
|
||||||
"<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>")
|
"<a href='https://github.com/harry0703/MoneyPrinterTurbo'>https://github.com/harry0703/MoneyPrinterTurbo</a>")
|
||||||
project_version = _cfg.get("project_version", "1.1.1")
|
project_version = _cfg.get("project_version", "1.1.2")
|
||||||
reload_debug = False
|
reload_debug = False
|
||||||
|
|
||||||
imagemagick_path = app.get("imagemagick_path", "")
|
imagemagick_path = app.get("imagemagick_path", "")
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ def delete_video(request: Request, task_id: str = Path(..., description="Task ID
|
|||||||
|
|
||||||
sm.state.delete_task(task_id)
|
sm.state.delete_task(task_id)
|
||||||
logger.success(f"video deleted: {utils.to_json(task)}")
|
logger.success(f"video deleted: {utils.to_json(task)}")
|
||||||
return utils.get_response(200, task)
|
return utils.get_response(200)
|
||||||
|
|
||||||
raise HttpException(task_id=task_id, status_code=404, message=f"{request_id}: task not found")
|
raise HttpException(task_id=task_id, status_code=404, message=f"{request_id}: task not found")
|
||||||
|
|
||||||
@@ -190,4 +190,5 @@ async def download_video(_: Request, file_path: str):
|
|||||||
headers = {
|
headers = {
|
||||||
"Content-Disposition": f"attachment; filename={filename}{extension}"
|
"Content-Disposition": f"attachment; filename={filename}{extension}"
|
||||||
}
|
}
|
||||||
return FileResponse(path=video_path, headers=headers, filename=f"{filename}{extension}", media_type=f'video/{extension[1:]}')
|
return FileResponse(path=video_path, headers=headers, filename=f"{filename}{extension}",
|
||||||
|
media_type=f'video/{extension[1:]}')
|
||||||
|
|||||||
@@ -100,17 +100,18 @@ def combine_videos(combined_video_path: str,
|
|||||||
clips.append(clip)
|
clips.append(clip)
|
||||||
video_duration += clip.duration
|
video_duration += clip.duration
|
||||||
|
|
||||||
final_clip = concatenate_videoclips(clips)
|
video_clip = concatenate_videoclips(clips)
|
||||||
final_clip = final_clip.set_fps(30)
|
video_clip = video_clip.set_fps(30)
|
||||||
logger.info(f"writing")
|
logger.info(f"writing")
|
||||||
# https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030
|
# https://github.com/harry0703/MoneyPrinterTurbo/issues/111#issuecomment-2032354030
|
||||||
final_clip.write_videofile(filename=combined_video_path,
|
video_clip.write_videofile(filename=combined_video_path,
|
||||||
threads=threads,
|
threads=threads,
|
||||||
logger=None,
|
logger=None,
|
||||||
temp_audiofile_path=output_dir,
|
temp_audiofile_path=output_dir,
|
||||||
audio_codec="aac",
|
audio_codec="aac",
|
||||||
fps=30,
|
fps=30,
|
||||||
)
|
)
|
||||||
|
video_clip.close()
|
||||||
logger.success(f"completed")
|
logger.success(f"completed")
|
||||||
return combined_video_path
|
return combined_video_path
|
||||||
|
|
||||||
@@ -263,7 +264,7 @@ def generate_video(video_path: str,
|
|||||||
logger=None,
|
logger=None,
|
||||||
fps=30,
|
fps=30,
|
||||||
)
|
)
|
||||||
|
video_clip.close()
|
||||||
logger.success(f"completed")
|
logger.success(f"completed")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
from datetime import datetime
|
||||||
from xml.sax.saxutils import unescape
|
from xml.sax.saxutils import unescape
|
||||||
from edge_tts.submaker import mktimestamp
|
from edge_tts.submaker import mktimestamp
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
@@ -8,10 +9,11 @@ from edge_tts import submaker, SubMaker
|
|||||||
import edge_tts
|
import edge_tts
|
||||||
from moviepy.video.tools import subtitles
|
from moviepy.video.tools import subtitles
|
||||||
|
|
||||||
|
from app.config import config
|
||||||
from app.utils import utils
|
from app.utils import utils
|
||||||
|
|
||||||
|
|
||||||
def get_all_voices(filter_locals=None) -> list[str]:
|
def get_all_azure_voices(filter_locals=None) -> list[str]:
|
||||||
if filter_locals is None:
|
if filter_locals is None:
|
||||||
filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW"]
|
filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW"]
|
||||||
voices_str = """
|
voices_str = """
|
||||||
@@ -956,6 +958,34 @@ Gender: Female
|
|||||||
|
|
||||||
Name: zu-ZA-ThembaNeural
|
Name: zu-ZA-ThembaNeural
|
||||||
Gender: Male
|
Gender: Male
|
||||||
|
|
||||||
|
|
||||||
|
Name: en-US-AvaMultilingualNeural-V2
|
||||||
|
Gender: Female
|
||||||
|
|
||||||
|
Name: en-US-AndrewMultilingualNeural-V2
|
||||||
|
Gender: Male
|
||||||
|
|
||||||
|
Name: en-US-EmmaMultilingualNeural-V2
|
||||||
|
Gender: Female
|
||||||
|
|
||||||
|
Name: en-US-BrianMultilingualNeural-V2
|
||||||
|
Gender: Male
|
||||||
|
|
||||||
|
Name: de-DE-FlorianMultilingualNeural-V2
|
||||||
|
Gender: Male
|
||||||
|
|
||||||
|
Name: de-DE-SeraphinaMultilingualNeural-V2
|
||||||
|
Gender: Female
|
||||||
|
|
||||||
|
Name: fr-FR-RemyMultilingualNeural-V2
|
||||||
|
Gender: Male
|
||||||
|
|
||||||
|
Name: fr-FR-VivienneMultilingualNeural-V2
|
||||||
|
Gender: Female
|
||||||
|
|
||||||
|
Name: zh-CN-XiaoxiaoMultilingualNeural-V2
|
||||||
|
Gender: Female
|
||||||
""".strip()
|
""".strip()
|
||||||
voices = []
|
voices = []
|
||||||
name = ''
|
name = ''
|
||||||
@@ -986,11 +1016,26 @@ Gender: Male
|
|||||||
def parse_voice_name(name: str):
|
def parse_voice_name(name: str):
|
||||||
# zh-CN-XiaoyiNeural-Female
|
# zh-CN-XiaoyiNeural-Female
|
||||||
# zh-CN-YunxiNeural-Male
|
# zh-CN-YunxiNeural-Male
|
||||||
|
# zh-CN-XiaoxiaoMultilingualNeural-V2-Female
|
||||||
name = name.replace("-Female", "").replace("-Male", "").strip()
|
name = name.replace("-Female", "").replace("-Male", "").strip()
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def is_azure_v2_voice(voice_name: str):
|
||||||
|
voice_name = parse_voice_name(voice_name)
|
||||||
|
print(voice_name)
|
||||||
|
if voice_name.endswith("-V2"):
|
||||||
|
return voice_name.replace("-V2", "").strip()
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def tts(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
|
def tts(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
|
||||||
|
if is_azure_v2_voice(voice_name):
|
||||||
|
return azure_tts_v2(text, voice_name, voice_file)
|
||||||
|
return azure_tts_v1(text, voice_name, voice_file)
|
||||||
|
|
||||||
|
|
||||||
|
def azure_tts_v1(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
try:
|
try:
|
||||||
@@ -1019,6 +1064,80 @@ def tts(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
|
||||||
|
voice_name = is_azure_v2_voice(voice_name)
|
||||||
|
if not voice_name:
|
||||||
|
logger.error(f"invalid voice name: {voice_name}")
|
||||||
|
raise ValueError(f"invalid voice name: {voice_name}")
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
def _format_duration_to_offset(duration) -> int:
|
||||||
|
if isinstance(duration, str):
|
||||||
|
time_obj = datetime.strptime(duration, "%H:%M:%S.%f")
|
||||||
|
milliseconds = (time_obj.hour * 3600000) + (time_obj.minute * 60000) + (time_obj.second * 1000) + (
|
||||||
|
time_obj.microsecond // 1000)
|
||||||
|
return milliseconds * 10000
|
||||||
|
|
||||||
|
if isinstance(duration, int):
|
||||||
|
return duration
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
for i in range(3):
|
||||||
|
try:
|
||||||
|
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
|
||||||
|
|
||||||
|
import azure.cognitiveservices.speech as speechsdk
|
||||||
|
|
||||||
|
sub_maker = SubMaker()
|
||||||
|
|
||||||
|
def speech_synthesizer_word_boundary_cb(evt: speechsdk.SessionEventArgs):
|
||||||
|
# print('WordBoundary event:')
|
||||||
|
# print('\tBoundaryType: {}'.format(evt.boundary_type))
|
||||||
|
# print('\tAudioOffset: {}ms'.format((evt.audio_offset + 5000)))
|
||||||
|
# print('\tDuration: {}'.format(evt.duration))
|
||||||
|
# print('\tText: {}'.format(evt.text))
|
||||||
|
# print('\tTextOffset: {}'.format(evt.text_offset))
|
||||||
|
# print('\tWordLength: {}'.format(evt.word_length))
|
||||||
|
|
||||||
|
duration = _format_duration_to_offset(str(evt.duration))
|
||||||
|
offset = _format_duration_to_offset(evt.audio_offset)
|
||||||
|
sub_maker.subs.append(evt.text)
|
||||||
|
sub_maker.offset.append((offset, offset + duration))
|
||||||
|
|
||||||
|
# Creates an instance of a speech config with specified subscription key and service region.
|
||||||
|
speech_key = config.azure.get("speech_key", "")
|
||||||
|
service_region = config.azure.get("speech_region", "")
|
||||||
|
audio_config = speechsdk.audio.AudioOutputConfig(filename=voice_file, use_default_speaker=True)
|
||||||
|
speech_config = speechsdk.SpeechConfig(subscription=speech_key,
|
||||||
|
region=service_region)
|
||||||
|
speech_config.speech_synthesis_voice_name = voice_name
|
||||||
|
# speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
|
||||||
|
# value='true')
|
||||||
|
speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestWordBoundary,
|
||||||
|
value='true')
|
||||||
|
|
||||||
|
speech_config.set_speech_synthesis_output_format(
|
||||||
|
speechsdk.SpeechSynthesisOutputFormat.Audio48Khz192KBitRateMonoMp3)
|
||||||
|
speech_synthesizer = speechsdk.SpeechSynthesizer(audio_config=audio_config,
|
||||||
|
speech_config=speech_config)
|
||||||
|
speech_synthesizer.synthesis_word_boundary.connect(speech_synthesizer_word_boundary_cb)
|
||||||
|
|
||||||
|
result = speech_synthesizer.speak_text_async(text).get()
|
||||||
|
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
||||||
|
logger.success(f"azure v2 speech synthesis succeeded: {voice_file}")
|
||||||
|
return sub_maker
|
||||||
|
elif result.reason == speechsdk.ResultReason.Canceled:
|
||||||
|
cancellation_details = result.cancellation_details
|
||||||
|
logger.error(f"azure v2 speech synthesis canceled: {cancellation_details.reason}")
|
||||||
|
if cancellation_details.reason == speechsdk.CancellationReason.Error:
|
||||||
|
logger.error(f"azure v2 speech synthesis error: {cancellation_details.error_details}")
|
||||||
|
logger.info(f"completed, output file: {voice_file}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"failed, error: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _format_text(text: str) -> str:
|
def _format_text(text: str) -> str:
|
||||||
# text = text.replace("\n", " ")
|
# text = text.replace("\n", " ")
|
||||||
text = text.replace("[", " ")
|
text = text.replace("[", " ")
|
||||||
@@ -1131,8 +1250,12 @@ def get_audio_duration(sub_maker: submaker.SubMaker):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
voices = get_all_voices()
|
voice_name = "zh-CN-XiaoxiaoMultilingualNeural-V2-Female"
|
||||||
print(voices)
|
voice_name = parse_voice_name(voice_name)
|
||||||
|
voice_name = is_azure_v2_voice(voice_name)
|
||||||
|
print(voice_name)
|
||||||
|
|
||||||
|
voices = get_all_azure_voices()
|
||||||
print(len(voices))
|
print(len(voices))
|
||||||
|
|
||||||
|
|
||||||
@@ -1140,6 +1263,7 @@ if __name__ == "__main__":
|
|||||||
temp_dir = utils.storage_dir("temp")
|
temp_dir = utils.storage_dir("temp")
|
||||||
|
|
||||||
voice_names = [
|
voice_names = [
|
||||||
|
"zh-CN-XiaoxiaoMultilingualNeural",
|
||||||
# 女性
|
# 女性
|
||||||
"zh-CN-XiaoxiaoNeural",
|
"zh-CN-XiaoxiaoNeural",
|
||||||
"zh-CN-XiaoyiNeural",
|
"zh-CN-XiaoyiNeural",
|
||||||
@@ -1174,6 +1298,7 @@ if __name__ == "__main__":
|
|||||||
业绩解读
|
业绩解读
|
||||||
利润方面,2023全年贵州茅台,>归母净利润增速为19%,其中营业收入正贡献18%,营业成本正贡献百分之一,管理费用正贡献百分之一点四。(注:归母净利润增速值=营业收入增速+各科目贡献,展示贡献/拖累的前四名科目,且要求贡献值/净利润增速>15%)
|
利润方面,2023全年贵州茅台,>归母净利润增速为19%,其中营业收入正贡献18%,营业成本正贡献百分之一,管理费用正贡献百分之一点四。(注:归母净利润增速值=营业收入增速+各科目贡献,展示贡献/拖累的前四名科目,且要求贡献值/净利润增速>15%)
|
||||||
"""
|
"""
|
||||||
|
text = "静夜思是唐代诗人李白创作的一首五言古诗。这首诗描绘了诗人在寂静的夜晚,看到窗前的明月,不禁想起远方的家乡和亲人"
|
||||||
|
|
||||||
text = _format_text(text)
|
text = _format_text(text)
|
||||||
lines = utils.split_string_by_punctuations(text)
|
lines = utils.split_string_by_punctuations(text)
|
||||||
@@ -1182,7 +1307,7 @@ if __name__ == "__main__":
|
|||||||
for voice_name in voice_names:
|
for voice_name in voice_names:
|
||||||
voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
|
voice_file = f"{temp_dir}/tts-{voice_name}.mp3"
|
||||||
subtitle_file = f"{temp_dir}/tts.mp3.srt"
|
subtitle_file = f"{temp_dir}/tts.mp3.srt"
|
||||||
sub_maker = tts(text=text, voice_name=voice_name, voice_file=voice_file)
|
sub_maker = azure_tts_v2(text=text, voice_name=voice_name, voice_file=voice_file)
|
||||||
create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
|
create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
|
||||||
audio_duration = get_audio_duration(sub_maker)
|
audio_duration = get_audio_duration(sub_maker)
|
||||||
print(f"voice: {voice_name}, audio duration: {audio_duration}s")
|
print(f"voice: {voice_name}, audio duration: {audio_duration}s")
|
||||||
|
|||||||
@@ -188,7 +188,7 @@ def split_string_by_punctuations(s):
|
|||||||
else:
|
else:
|
||||||
result.append(txt.strip())
|
result.append(txt.strip())
|
||||||
txt = ""
|
txt = ""
|
||||||
|
result.append(txt.strip())
|
||||||
# filter empty string
|
# filter empty string
|
||||||
result = list(filter(None, result))
|
result = list(filter(None, result))
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -161,4 +161,10 @@
|
|||||||
### Example: "http://user:pass@proxy:1234"
|
### Example: "http://user:pass@proxy:1234"
|
||||||
### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
|
### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
|
||||||
# http = "http://10.10.1.10:3128"
|
# http = "http://10.10.1.10:3128"
|
||||||
# https = "http://10.10.1.10:1080"
|
# https = "http://10.10.1.10:1080"
|
||||||
|
|
||||||
|
[azure]
|
||||||
|
# Azure Speech API Key
|
||||||
|
# Get your API key at https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices
|
||||||
|
speech_key=""
|
||||||
|
speech_region=""
|
||||||
@@ -16,4 +16,10 @@ g4f~=0.2.5.4
|
|||||||
dashscope~=1.15.0
|
dashscope~=1.15.0
|
||||||
google.generativeai~=0.4.1
|
google.generativeai~=0.4.1
|
||||||
python-multipart~=0.0.9
|
python-multipart~=0.0.9
|
||||||
redis==5.0.3
|
redis==5.0.3
|
||||||
|
# if you use pillow~=10.3.0, you will get "PIL.Image' has no attribute 'ANTIALIAS'" error when resize video
|
||||||
|
# please install opencv-python to fix "PIL.Image' has no attribute 'ANTIALIAS'" error
|
||||||
|
opencv-python
|
||||||
|
# for azure speech
|
||||||
|
# https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/9-more-realistic-ai-voices-for-conversations-now-generally/ba-p/4099471
|
||||||
|
azure-cognitiveservices-speech~=1.37.0
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
# Add the root directory of the project to the system path to allow importing modules from the project
|
# Add the root directory of the project to the system path to allow importing modules from the project
|
||||||
root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||||
@@ -165,7 +166,6 @@ with st.expander(tr("Basic Settings"), expanded=False):
|
|||||||
code = selected_language.split(" - ")[0].strip()
|
code = selected_language.split(" - ")[0].strip()
|
||||||
st.session_state['ui_language'] = code
|
st.session_state['ui_language'] = code
|
||||||
config.ui['language'] = code
|
config.ui['language'] = code
|
||||||
config.save_config()
|
|
||||||
|
|
||||||
with middle_config_panel:
|
with middle_config_panel:
|
||||||
# openai
|
# openai
|
||||||
@@ -207,8 +207,6 @@ with st.expander(tr("Basic Settings"), expanded=False):
|
|||||||
if st_llm_account_id:
|
if st_llm_account_id:
|
||||||
config.app[f"{llm_provider}_account_id"] = st_llm_account_id
|
config.app[f"{llm_provider}_account_id"] = st_llm_account_id
|
||||||
|
|
||||||
config.save_config()
|
|
||||||
|
|
||||||
with right_config_panel:
|
with right_config_panel:
|
||||||
pexels_api_keys = config.app.get("pexels_api_keys", [])
|
pexels_api_keys = config.app.get("pexels_api_keys", [])
|
||||||
if isinstance(pexels_api_keys, str):
|
if isinstance(pexels_api_keys, str):
|
||||||
@@ -219,7 +217,6 @@ with st.expander(tr("Basic Settings"), expanded=False):
|
|||||||
pexels_api_key = pexels_api_key.replace(" ", "")
|
pexels_api_key = pexels_api_key.replace(" ", "")
|
||||||
if pexels_api_key:
|
if pexels_api_key:
|
||||||
config.app["pexels_api_keys"] = pexels_api_key.split(",")
|
config.app["pexels_api_keys"] = pexels_api_key.split(",")
|
||||||
config.save_config()
|
|
||||||
|
|
||||||
panel = st.columns(3)
|
panel = st.columns(3)
|
||||||
left_panel = panel[0]
|
left_panel = panel[0]
|
||||||
@@ -302,20 +299,20 @@ with middle_panel:
|
|||||||
index=0)
|
index=0)
|
||||||
with st.container(border=True):
|
with st.container(border=True):
|
||||||
st.write(tr("Audio Settings"))
|
st.write(tr("Audio Settings"))
|
||||||
voices = voice.get_all_voices(filter_locals=["zh-CN", "zh-HK", "zh-TW", "de-DE", "en-US"])
|
voices = voice.get_all_azure_voices(filter_locals=["zh-CN", "zh-HK", "zh-TW", "de-DE", "en-US", "fr-FR"])
|
||||||
friendly_names = {
|
friendly_names = {
|
||||||
voice: voice.
|
v: v.
|
||||||
replace("Female", tr("Female")).
|
replace("Female", tr("Female")).
|
||||||
replace("Male", tr("Male")).
|
replace("Male", tr("Male")).
|
||||||
replace("Neural", "") for
|
replace("Neural", "") for
|
||||||
voice in voices}
|
v in voices}
|
||||||
saved_voice_name = config.ui.get("voice_name", "")
|
saved_voice_name = config.ui.get("voice_name", "")
|
||||||
saved_voice_name_index = 0
|
saved_voice_name_index = 0
|
||||||
if saved_voice_name in friendly_names:
|
if saved_voice_name in friendly_names:
|
||||||
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
|
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
|
||||||
else:
|
else:
|
||||||
for i, voice in enumerate(voices):
|
for i, v in enumerate(voices):
|
||||||
if voice.lower().startswith(st.session_state['ui_language'].lower()):
|
if v.lower().startswith(st.session_state['ui_language'].lower()):
|
||||||
saved_voice_name_index = i
|
saved_voice_name_index = i
|
||||||
break
|
break
|
||||||
|
|
||||||
@@ -326,7 +323,13 @@ with middle_panel:
|
|||||||
voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)]
|
voice_name = list(friendly_names.keys())[list(friendly_names.values()).index(selected_friendly_name)]
|
||||||
params.voice_name = voice_name
|
params.voice_name = voice_name
|
||||||
config.ui['voice_name'] = voice_name
|
config.ui['voice_name'] = voice_name
|
||||||
config.save_config()
|
if voice.is_azure_v2_voice(voice_name):
|
||||||
|
saved_azure_speech_region = config.azure.get(f"speech_region", "")
|
||||||
|
saved_azure_speech_key = config.azure.get(f"speech_key", "")
|
||||||
|
azure_speech_region = st.text_input(tr("Speech Region"), value=saved_azure_speech_region)
|
||||||
|
azure_speech_key = st.text_input(tr("Speech Key"), value=saved_azure_speech_key, type="password")
|
||||||
|
config.azure["speech_region"] = azure_speech_region
|
||||||
|
config.azure["speech_key"] = azure_speech_key
|
||||||
|
|
||||||
params.voice_volume = st.selectbox(tr("Speech Volume"),
|
params.voice_volume = st.selectbox(tr("Speech Volume"),
|
||||||
options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0], index=2)
|
options=[0.6, 0.8, 1.0, 1.2, 1.5, 2.0, 3.0, 4.0, 5.0], index=2)
|
||||||
@@ -363,7 +366,6 @@ with right_panel:
|
|||||||
saved_font_name_index = font_names.index(saved_font_name)
|
saved_font_name_index = font_names.index(saved_font_name)
|
||||||
params.font_name = st.selectbox(tr("Font"), font_names, index=saved_font_name_index)
|
params.font_name = st.selectbox(tr("Font"), font_names, index=saved_font_name_index)
|
||||||
config.ui['font_name'] = params.font_name
|
config.ui['font_name'] = params.font_name
|
||||||
config.save_config()
|
|
||||||
|
|
||||||
subtitle_positions = [
|
subtitle_positions = [
|
||||||
(tr("Top"), "top"),
|
(tr("Top"), "top"),
|
||||||
@@ -446,3 +448,5 @@ if start_button:
|
|||||||
open_task_folder(task_id)
|
open_task_folder(task_id)
|
||||||
logger.info(tr("Video Generation Completed"))
|
logger.info(tr("Video Generation Completed"))
|
||||||
scroll_to_bottom()
|
scroll_to_bottom()
|
||||||
|
|
||||||
|
config.save_config()
|
||||||
|
|||||||
@@ -23,6 +23,8 @@
|
|||||||
"Number of Videos Generated Simultaneously": "Anzahl der parallel generierten Videos",
|
"Number of Videos Generated Simultaneously": "Anzahl der parallel generierten Videos",
|
||||||
"Audio Settings": "**Audio Einstellungen**",
|
"Audio Settings": "**Audio Einstellungen**",
|
||||||
"Speech Synthesis": "Sprachausgabe",
|
"Speech Synthesis": "Sprachausgabe",
|
||||||
|
"Speech Region": "Region(:red[Required,[Get Region](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||||
|
"Speech Key": "API Key(:red[Required,[Get API Key](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||||
"Speech Volume": "Lautstärke der Sprachausgabe",
|
"Speech Volume": "Lautstärke der Sprachausgabe",
|
||||||
"Male": "Männlich",
|
"Male": "Männlich",
|
||||||
"Female": "Weiblich",
|
"Female": "Weiblich",
|
||||||
|
|||||||
@@ -23,6 +23,8 @@
|
|||||||
"Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously",
|
"Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously",
|
||||||
"Audio Settings": "**Audio Settings**",
|
"Audio Settings": "**Audio Settings**",
|
||||||
"Speech Synthesis": "Speech Synthesis Voice",
|
"Speech Synthesis": "Speech Synthesis Voice",
|
||||||
|
"Speech Region": "Region(:red[Required,[Get Region](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||||
|
"Speech Key": "API Key(:red[Required,[Get API Key](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||||
"Speech Volume": "Speech Volume (1.0 represents 100%)",
|
"Speech Volume": "Speech Volume (1.0 represents 100%)",
|
||||||
"Male": "Male",
|
"Male": "Male",
|
||||||
"Female": "Female",
|
"Female": "Female",
|
||||||
|
|||||||
@@ -23,6 +23,8 @@
|
|||||||
"Number of Videos Generated Simultaneously": "同时生成视频数量",
|
"Number of Videos Generated Simultaneously": "同时生成视频数量",
|
||||||
"Audio Settings": "**音频设置**",
|
"Audio Settings": "**音频设置**",
|
||||||
"Speech Synthesis": "朗读声音(:red[尽量与文案语言保持一致])",
|
"Speech Synthesis": "朗读声音(:red[尽量与文案语言保持一致])",
|
||||||
|
"Speech Region": "服务区域(:red[必填,[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||||
|
"Speech Key": "API Key(:red[必填,[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||||
"Speech Volume": "朗读音量(1.0表示100%)",
|
"Speech Volume": "朗读音量(1.0表示100%)",
|
||||||
"Male": "男性",
|
"Male": "男性",
|
||||||
"Female": "女性",
|
"Female": "女性",
|
||||||
|
|||||||
Reference in New Issue
Block a user