SileroTTS preview (#1934)

This commit is contained in:
missionfloyd 2023-05-09 17:28:59 -06:00 committed by GitHub
parent 8fa5f651d6
commit fe4dfc647d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 760 additions and 1 deletions

View file

@ -1,9 +1,9 @@
import random
import time
from pathlib import Path
import gradio as gr
import torch
from extensions.silero_tts import tts_preprocessor
from modules import chat, shared
from modules.html_generator import chat_html_wrapper
@ -147,6 +147,30 @@ def setup():
model = load_model()
def random_sentence():
with open("extensions/silero_tts/harvard_sentences.txt") as f:
return random.choice(list(f))
def voice_preview(preview_text):
global model, current_params, streaming_state
for i in params:
if params[i] != current_params[i]:
model = load_model()
current_params = params.copy()
break
string = tts_preprocessor.preprocess(preview_text or random_sentence())
output_file = Path('extensions/silero_tts/outputs/voice_preview.wav')
prosody = f"<prosody rate=\"{params['voice_speed']}\" pitch=\"{params['voice_pitch']}\">"
silero_input = f'<speak>{prosody}{xmlesc(string)}</prosody></speak>'
model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
return f'<audio src="file/{output_file.as_posix()}?{int(time.time())}" controls autoplay></audio>'
def ui():
# Gradio elements
with gr.Accordion("Silero TTS"):
@ -160,6 +184,11 @@ def ui():
v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')
with gr.Row():
preview_text = gr.Text(show_label=False, placeholder="Preview text", elem_id="silero_preview_text")
preview_play = gr.Button("Preview")
preview_audio = gr.HTML(visible=False)
with gr.Row():
convert = gr.Button('Permanently replace audios with the message texts')
convert_cancel = gr.Button('Cancel', visible=False)
@ -184,3 +213,7 @@ def ui():
voice.change(lambda x: params.update({"speaker": x}), voice, None)
v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None)
v_speed.change(lambda x: params.update({"voice_speed": x}), v_speed, None)
# Play preview
preview_text.submit(voice_preview, preview_text, preview_audio)
preview_play.click(voice_preview, preview_text, preview_audio)