formosan-f5-tts

Paused

App Files Files Community

txya900619 commited on Feb 17

Commit

443b650

1 Parent(s): 782de99

feat: change ipa parser to support ŋ

Browse files

Files changed (2) hide show

app.py +16 -8
ipa/ipa.py +33 -34

app.py CHANGED Viewed

@@ -108,7 +108,7 @@ def infer(
 def get_title():
-    with open("DEMO.md") as tong:
         return tong.readline().strip("# ")
@@ -202,6 +202,14 @@ with demo:
         nfe_slider: int,
         speed_slider: float,
     ):
         ref_audio_info = torchaudio.info(ref_audio_input)
         ref_duration = ref_audio_info.num_frames / ref_audio_info.sample_rate
         target_duration = (
@@ -212,15 +220,15 @@ with demo:
         )
         print(f"Reference duration: {ref_duration}")
         print(f"Target duration: {target_duration}")
-        if len(ref_text_input) == 0:
-            raise gr.Error("請勿輸入空字串。")
-        ref_text_input = text_to_ipa(ref_text_input)
-        if len(gen_text_input) == 0:
-            raise gr.Error("請勿輸入空字串。")
-        gen_text_input = text_to_ipa(gen_text_input)
         audio_out, spectrogram_path = infer(
             ref_audio_input,

 def get_title():
+    with open("DEMO.md", encoding="utf-8") as tong:
         return tong.readline().strip("# ")
         nfe_slider: int,
         speed_slider: float,
     ):
+        ref_text_input = ref_text_input.strip()
+        if len(ref_text_input) == 0:
+            raise gr.Error("請勿輸入空字串。")
+        gen_text_input = gen_text_input.strip()
+        if len(gen_text_input) == 0:
+            raise gr.Error("請勿輸入空字串。")
         ref_audio_info = torchaudio.info(ref_audio_input)
         ref_duration = ref_audio_info.num_frames / ref_audio_info.sample_rate
         target_duration = (
         )
         print(f"Reference duration: {ref_duration}")
         print(f"Target duration: {target_duration}")
+        ignore_punctuation = True
+        ipa_with_ng = True
+        if "with-trv" in model_drop_down:
+            ignore_punctuation = False
+            ipa_with_ng = False
+        ref_text_input = text_to_ipa(ref_text_input, ignore_punctuation, ipa_with_ng)
+        gen_text_input = text_to_ipa(gen_text_input, ignore_punctuation, ipa_with_ng)
         audio_out, spectrogram_path = infer(
             ref_audio_input,

ipa/ipa.py CHANGED Viewed

@@ -1,52 +1,43 @@
 import re
 from omegaconf import OmegaConf
-XIUGULUAN_G2P = OmegaConf.to_object(OmegaConf.load("configs/g2p.yaml"))["g2p"][
-    "阿美_秀姑巒"
-]
-def can_form_string(x, symbol_dict):
-    def helper(x, symbol_dict, matched_parts):
-        if not x:
-            return True, matched_parts
-        for key in symbol_dict.keys():
-            if x.startswith(key):
-                result, parts = helper(
-                    x[len(key) :], symbol_dict, matched_parts + [key]
-                )
-                if result:
-                    return True, parts
-        return False, []
-    return helper(x, symbol_dict, [])
-def text_to_ipa(text, ignore_comma=True):
-    ipa = []
     text = text.lower()
-    text = re.sub(r"[.?!]", "", text)
     text = text.replace("'", "’")
     words = text.split()  # change in future
-    print(f"ipa: {words}")
     for word in words:
-        ipa_parts = ""
-        extended_g2p = {**XIUGULUAN_G2P, ",": "" if ignore_comma else ","}
-        result, matched_parts = can_form_string(word, extended_g2p)
-        if result is False:
-            print(f"no match g2p : {word}")
-            return ""
-        for matched_part in matched_parts:
-            ipa_parts = ipa_parts + extended_g2p[matched_part]
-        ipa.append(ipa_parts)
     ipa = (
         " ".join(ipa)
         .replace("g", "ɡ")
@@ -55,4 +46,12 @@ def text_to_ipa(text, ignore_comma=True):
         .replace("R", "ʀ")
         .replace("ʤ", "dʒ")
     )
     return ipa

 import re
+import gradio as gr
 from omegaconf import OmegaConf
+g2p_config = OmegaConf.load("configs/g2p.yaml")
+g2p_object = OmegaConf.to_object(g2p_config)
+XIUGULUAN_G2P = g2p_object["g2p"]["阿美_秀姑巒"]
+def text_to_ipa(text: str, ignore_punctuation=False, ipa_with_ng=False) -> str:
     text = text.lower()
     text = text.replace("'", "’")
+    text = re.sub(r"\s+", " ", text)  # remove extra spaces
     words = text.split()  # change in future
+    print(f"text: {words}")
+    ipa = []
+    unknown_chars = set()
+    extended_g2p = {**XIUGULUAN_G2P, ",": ",", ".": ".", "?": "?", "!": "!"}
+    extended_g2p_sorted_keys = sorted(extended_g2p.keys(), key=len, reverse=True)
     for word in words:
+        unknown_char = word
+        converted_word = word
+        for key in extended_g2p_sorted_keys:
+            unknown_char = unknown_char.replace(key, "")
+            converted_word = converted_word.replace(key, extended_g2p[key])
+        if len(unknown_char) > 0:  # If there are unknown characters
+            unknown_chars.update(set(unknown_char))
+            continue
+        ipa.append(converted_word)
+    if len(unknown_chars) > 0:
+        raise gr.Error(
+            f"Unknown characters: {', '.join(unknown_chars)}. Please remove them and try again."
+        )
     ipa = (
         " ".join(ipa)
         .replace("g", "ɡ")
         .replace("R", "ʀ")
         .replace("ʤ", "dʒ")
     )
+    if ignore_punctuation:
+        ipa = re.sub(r"[.?!,]", "", ipa)
+    if ipa_with_ng:
+        ipa = ipa.replace("ŋ", "nɡ")
+    print(f"ipa: {ipa}")
     return ipa