formosan-f5-tts

Paused

App Files Files Community

txya900619 commited on Feb 17

Commit

d992f07

1 Parent(s): ed43fae

feat: add language selection and update IPA processing to support multiple languages

Browse files

Files changed (2) hide show

app.py +17 -2
ipa/ipa.py +5 -4

app.py CHANGED Viewed

@@ -138,6 +138,12 @@ with demo:
                 label="模型",
             )
             ref_audio_input = gr.Audio(
                 type="filepath",
                 waveform_options=gr.WaveformOptions(
@@ -194,6 +200,7 @@ with demo:
     @gpu_decorator
     def basic_tts(
         model_drop_down: str,
         ref_audio_input: str,
         ref_text_input: str,
         gen_text_input: str,
@@ -227,8 +234,12 @@ with demo:
             ignore_punctuation = False
             ipa_with_ng = False
-        ref_text_input = text_to_ipa(ref_text_input, ignore_punctuation, ipa_with_ng)
-        gen_text_input = text_to_ipa(gen_text_input, ignore_punctuation, ipa_with_ng)
         audio_out, spectrogram_path = infer(
             ref_audio_input,
@@ -246,6 +257,7 @@ with demo:
         basic_tts,
         inputs=[
             model_drop_down,
             ref_audio_input,
             ref_text_input,
             gen_text_input,
@@ -259,11 +271,13 @@ with demo:
     gr.Examples(
         [
             [
                 "./ref_wav/E-PV001-0001.wav",
                 "o pakafana’ ni akong to pinangan no romi’ad.",
                 "Mafana’ kiso a misanoPangcah haw?",
             ],
             [
                 "./ref_wav/E-PV001-0010.wav",
                 "ano caay pina’on maripa’ iso cangra i, caay ka siwala^, to^to^ sa a pasayra toya kaitiraan nangra.",
                 "Pafelien cingra to misapoeneray a faloco', nanay mada'oc matilid i faloco' nira konini.",
@@ -271,6 +285,7 @@ with demo:
         ],
         label="範例",
         inputs=[
             ref_audio_input,
             ref_text_input,
             gen_text_input,

                 label="模型",
             )
+            language = gr.Radio(
+                choices=["阿美_秀姑巒", "太魯閣"],
+                label="語言",
+                value="阿美_秀姑巒",
+            )
             ref_audio_input = gr.Audio(
                 type="filepath",
                 waveform_options=gr.WaveformOptions(
     @gpu_decorator
     def basic_tts(
         model_drop_down: str,
+        language: str,
         ref_audio_input: str,
         ref_text_input: str,
         gen_text_input: str,
             ignore_punctuation = False
             ipa_with_ng = False
+        ref_text_input = text_to_ipa(
+            ref_text_input, language, ignore_punctuation, ipa_with_ng
+        )
+        gen_text_input = text_to_ipa(
+            gen_text_input, language, ignore_punctuation, ipa_with_ng
+        )
         audio_out, spectrogram_path = infer(
             ref_audio_input,
         basic_tts,
         inputs=[
             model_drop_down,
+            language,
             ref_audio_input,
             ref_text_input,
             gen_text_input,
     gr.Examples(
         [
             [
+                "阿美_秀姑巒",
                 "./ref_wav/E-PV001-0001.wav",
                 "o pakafana’ ni akong to pinangan no romi’ad.",
                 "Mafana’ kiso a misanoPangcah haw?",
             ],
             [
+                "阿美_秀姑巒",
                 "./ref_wav/E-PV001-0010.wav",
                 "ano caay pina’on maripa’ iso cangra i, caay ka siwala^, to^to^ sa a pasayra toya kaitiraan nangra.",
                 "Pafelien cingra to misapoeneray a faloco', nanay mada'oc matilid i faloco' nira konini.",
         ],
         label="範例",
         inputs=[
+            language,
             ref_audio_input,
             ref_text_input,
             gen_text_input,

ipa/ipa.py CHANGED Viewed

@@ -4,11 +4,12 @@ import gradio as gr
 from omegaconf import OmegaConf
 g2p_config = OmegaConf.load("configs/g2p.yaml")
-g2p_object = OmegaConf.to_object(g2p_config)
-XIUGULUAN_G2P = g2p_object["g2p"]["阿美_秀姑巒"]
-def text_to_ipa(text: str, ignore_punctuation=False, ipa_with_ng=False) -> str:
     text = text.lower()
     text = text.replace("'", "’")
     text = re.sub(r"\s+", " ", text)  # remove extra spaces
@@ -18,7 +19,7 @@ def text_to_ipa(text: str, ignore_punctuation=False, ipa_with_ng=False) -> str:
     ipa = []
     unknown_chars = set()
-    extended_g2p = {**XIUGULUAN_G2P, ",": ",", ".": ".", "?": "?", "!": "!"}
     extended_g2p_sorted_keys = sorted(extended_g2p.keys(), key=len, reverse=True)
     for word in words:
         unknown_char = word

 from omegaconf import OmegaConf
 g2p_config = OmegaConf.load("configs/g2p.yaml")
+g2p_object = OmegaConf.to_object(g2p_config)["g2p"]
+def text_to_ipa(
+    text: str, language: str, ignore_punctuation=False, ipa_with_ng=False
+) -> str:
     text = text.lower()
     text = text.replace("'", "’")
     text = re.sub(r"\s+", " ", text)  # remove extra spaces
     ipa = []
     unknown_chars = set()
+    extended_g2p = {**g2p_object[language], ",": ",", ".": ".", "?": "?", "!": "!"}
     extended_g2p_sorted_keys = sorted(extended_g2p.keys(), key=len, reverse=True)
     for word in words:
         unknown_char = word