Spaces:
Paused
Paused
Commit
·
443b650
1
Parent(s):
782de99
feat: change ipa parser to support ŋ
Browse files- app.py +16 -8
- ipa/ipa.py +33 -34
app.py
CHANGED
|
@@ -108,7 +108,7 @@ def infer(
|
|
| 108 |
|
| 109 |
|
| 110 |
def get_title():
|
| 111 |
-
with open("DEMO.md") as tong:
|
| 112 |
return tong.readline().strip("# ")
|
| 113 |
|
| 114 |
|
|
@@ -202,6 +202,14 @@ with demo:
|
|
| 202 |
nfe_slider: int,
|
| 203 |
speed_slider: float,
|
| 204 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
ref_audio_info = torchaudio.info(ref_audio_input)
|
| 206 |
ref_duration = ref_audio_info.num_frames / ref_audio_info.sample_rate
|
| 207 |
target_duration = (
|
|
@@ -212,15 +220,15 @@ with demo:
|
|
| 212 |
)
|
| 213 |
print(f"Reference duration: {ref_duration}")
|
| 214 |
print(f"Target duration: {target_duration}")
|
| 215 |
-
if len(ref_text_input) == 0:
|
| 216 |
-
raise gr.Error("請勿輸入空字串。")
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
if
|
| 221 |
-
|
|
|
|
| 222 |
|
| 223 |
-
|
|
|
|
| 224 |
|
| 225 |
audio_out, spectrogram_path = infer(
|
| 226 |
ref_audio_input,
|
|
|
|
| 108 |
|
| 109 |
|
| 110 |
def get_title():
|
| 111 |
+
with open("DEMO.md", encoding="utf-8") as tong:
|
| 112 |
return tong.readline().strip("# ")
|
| 113 |
|
| 114 |
|
|
|
|
| 202 |
nfe_slider: int,
|
| 203 |
speed_slider: float,
|
| 204 |
):
|
| 205 |
+
ref_text_input = ref_text_input.strip()
|
| 206 |
+
if len(ref_text_input) == 0:
|
| 207 |
+
raise gr.Error("請勿輸入空字串。")
|
| 208 |
+
|
| 209 |
+
gen_text_input = gen_text_input.strip()
|
| 210 |
+
if len(gen_text_input) == 0:
|
| 211 |
+
raise gr.Error("請勿輸入空字串。")
|
| 212 |
+
|
| 213 |
ref_audio_info = torchaudio.info(ref_audio_input)
|
| 214 |
ref_duration = ref_audio_info.num_frames / ref_audio_info.sample_rate
|
| 215 |
target_duration = (
|
|
|
|
| 220 |
)
|
| 221 |
print(f"Reference duration: {ref_duration}")
|
| 222 |
print(f"Target duration: {target_duration}")
|
|
|
|
|
|
|
| 223 |
|
| 224 |
+
ignore_punctuation = True
|
| 225 |
+
ipa_with_ng = True
|
| 226 |
+
if "with-trv" in model_drop_down:
|
| 227 |
+
ignore_punctuation = False
|
| 228 |
+
ipa_with_ng = False
|
| 229 |
|
| 230 |
+
ref_text_input = text_to_ipa(ref_text_input, ignore_punctuation, ipa_with_ng)
|
| 231 |
+
gen_text_input = text_to_ipa(gen_text_input, ignore_punctuation, ipa_with_ng)
|
| 232 |
|
| 233 |
audio_out, spectrogram_path = infer(
|
| 234 |
ref_audio_input,
|
ipa/ipa.py
CHANGED
|
@@ -1,52 +1,43 @@
|
|
| 1 |
import re
|
| 2 |
|
|
|
|
| 3 |
from omegaconf import OmegaConf
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
]
|
| 8 |
|
| 9 |
|
| 10 |
-
def
|
| 11 |
-
def helper(x, symbol_dict, matched_parts):
|
| 12 |
-
if not x:
|
| 13 |
-
return True, matched_parts
|
| 14 |
-
|
| 15 |
-
for key in symbol_dict.keys():
|
| 16 |
-
if x.startswith(key):
|
| 17 |
-
result, parts = helper(
|
| 18 |
-
x[len(key) :], symbol_dict, matched_parts + [key]
|
| 19 |
-
)
|
| 20 |
-
if result:
|
| 21 |
-
return True, parts
|
| 22 |
-
|
| 23 |
-
return False, []
|
| 24 |
-
|
| 25 |
-
return helper(x, symbol_dict, [])
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
def text_to_ipa(text, ignore_comma=True):
|
| 29 |
-
ipa = []
|
| 30 |
text = text.lower()
|
| 31 |
-
text = re.sub(r"[.?!]", "", text)
|
| 32 |
text = text.replace("'", "’")
|
|
|
|
| 33 |
words = text.split() # change in future
|
| 34 |
|
| 35 |
-
print(f"
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
for word in words:
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
|
| 43 |
-
print(f"no match g2p : {word}")
|
| 44 |
-
return ""
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
ipa.append(ipa_parts)
|
| 50 |
ipa = (
|
| 51 |
" ".join(ipa)
|
| 52 |
.replace("g", "ɡ")
|
|
@@ -55,4 +46,12 @@ def text_to_ipa(text, ignore_comma=True):
|
|
| 55 |
.replace("R", "ʀ")
|
| 56 |
.replace("ʤ", "dʒ")
|
| 57 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
return ipa
|
|
|
|
| 1 |
import re
|
| 2 |
|
| 3 |
+
import gradio as gr
|
| 4 |
from omegaconf import OmegaConf
|
| 5 |
|
| 6 |
+
g2p_config = OmegaConf.load("configs/g2p.yaml")
|
| 7 |
+
g2p_object = OmegaConf.to_object(g2p_config)
|
| 8 |
+
XIUGULUAN_G2P = g2p_object["g2p"]["阿美_秀姑巒"]
|
| 9 |
|
| 10 |
|
| 11 |
+
def text_to_ipa(text: str, ignore_punctuation=False, ipa_with_ng=False) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
text = text.lower()
|
|
|
|
| 13 |
text = text.replace("'", "’")
|
| 14 |
+
text = re.sub(r"\s+", " ", text) # remove extra spaces
|
| 15 |
words = text.split() # change in future
|
| 16 |
|
| 17 |
+
print(f"text: {words}")
|
| 18 |
|
| 19 |
+
ipa = []
|
| 20 |
+
unknown_chars = set()
|
| 21 |
+
extended_g2p = {**XIUGULUAN_G2P, ",": ",", ".": ".", "?": "?", "!": "!"}
|
| 22 |
+
extended_g2p_sorted_keys = sorted(extended_g2p.keys(), key=len, reverse=True)
|
| 23 |
for word in words:
|
| 24 |
+
unknown_char = word
|
| 25 |
+
converted_word = word
|
| 26 |
+
for key in extended_g2p_sorted_keys:
|
| 27 |
+
unknown_char = unknown_char.replace(key, "")
|
| 28 |
+
converted_word = converted_word.replace(key, extended_g2p[key])
|
| 29 |
+
|
| 30 |
+
if len(unknown_char) > 0: # If there are unknown characters
|
| 31 |
+
unknown_chars.update(set(unknown_char))
|
| 32 |
+
continue
|
| 33 |
|
| 34 |
+
ipa.append(converted_word)
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
if len(unknown_chars) > 0:
|
| 37 |
+
raise gr.Error(
|
| 38 |
+
f"Unknown characters: {', '.join(unknown_chars)}. Please remove them and try again."
|
| 39 |
+
)
|
| 40 |
|
|
|
|
| 41 |
ipa = (
|
| 42 |
" ".join(ipa)
|
| 43 |
.replace("g", "ɡ")
|
|
|
|
| 46 |
.replace("R", "ʀ")
|
| 47 |
.replace("ʤ", "dʒ")
|
| 48 |
)
|
| 49 |
+
|
| 50 |
+
if ignore_punctuation:
|
| 51 |
+
ipa = re.sub(r"[.?!,]", "", ipa)
|
| 52 |
+
|
| 53 |
+
if ipa_with_ng:
|
| 54 |
+
ipa = ipa.replace("ŋ", "nɡ")
|
| 55 |
+
|
| 56 |
+
print(f"ipa: {ipa}")
|
| 57 |
return ipa
|