1. Add azure_tts_v1 to control the speed of speech

This commit is contained in:
yyhhyyyyyy
2024-07-19 11:06:34 +08:00
parent 6853163905
commit 63fb848a17
5 changed files with 25 additions and 7 deletions

View File

@@ -108,6 +108,7 @@ class VideoParams(BaseModel):
voice_name: Optional[str] = ""
voice_volume: Optional[float] = 1.0
voice_rate: Optional[float] = 1.0
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2

View File

@@ -32,6 +32,7 @@ def start(task_id, params: VideoParams):
video_subject = params.video_subject
voice_name = voice.parse_voice_name(params.voice_name)
voice_rate = params.voice_rate
paragraph_number = params.paragraph_number
n_threads = params.n_threads
max_clip_duration = params.video_clip_duration
@@ -84,7 +85,7 @@ def start(task_id, params: VideoParams):
logger.info("\n\n## generating audio")
audio_file = path.join(utils.task_dir(task_id), f"audio.mp3")
sub_maker = voice.tts(text=video_script, voice_name=voice_name, voice_file=audio_file)
sub_maker = voice.tts(text=video_script, voice_name=voice_name, voice_rate=voice_rate, voice_file=audio_file)
if sub_maker is None:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error(

View File

@@ -1028,21 +1028,32 @@ def is_azure_v2_voice(voice_name: str):
return ""
def tts(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
def tts(text: str, voice_name: str, voice_rate: float, voice_file: str) -> [SubMaker, None]:
if is_azure_v2_voice(voice_name):
return azure_tts_v2(text, voice_name, voice_file)
return azure_tts_v1(text, voice_name, voice_file)
return azure_tts_v1(text, voice_name, voice_rate, voice_file)
def azure_tts_v1(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
def convert_rate_to_percent(rate: float) -> str:
if rate == 1.0:
return "+0%"
percent = round((rate - 1.0) * 100)
if percent > 0:
return f"+{percent}%"
else:
return f"{percent}%"
def azure_tts_v1(text: str, voice_name: str, voice_rate: float, voice_file: str) -> [SubMaker, None]:
voice_name = parse_voice_name(voice_name)
text = text.strip()
rate_str = convert_rate_to_percent(voice_rate)
for i in range(3):
try:
logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
async def _do() -> SubMaker:
communicate = edge_tts.Communicate(text, voice_name)
communicate = edge_tts.Communicate(text, voice_name, rate=rate_str)
sub_maker = edge_tts.SubMaker()
with open(voice_file, "wb") as file:
async for chunk in communicate.stream():