Spaces:
Paused
Paused
Commit
·
d992f07
1
Parent(s):
ed43fae
feat: add language selection and update IPA processing to support multiple languages
Browse files- app.py +17 -2
- ipa/ipa.py +5 -4
app.py
CHANGED
|
@@ -138,6 +138,12 @@ with demo:
|
|
| 138 |
label="模型",
|
| 139 |
)
|
| 140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
ref_audio_input = gr.Audio(
|
| 142 |
type="filepath",
|
| 143 |
waveform_options=gr.WaveformOptions(
|
|
@@ -194,6 +200,7 @@ with demo:
|
|
| 194 |
@gpu_decorator
|
| 195 |
def basic_tts(
|
| 196 |
model_drop_down: str,
|
|
|
|
| 197 |
ref_audio_input: str,
|
| 198 |
ref_text_input: str,
|
| 199 |
gen_text_input: str,
|
|
@@ -227,8 +234,12 @@ with demo:
|
|
| 227 |
ignore_punctuation = False
|
| 228 |
ipa_with_ng = False
|
| 229 |
|
| 230 |
-
ref_text_input = text_to_ipa(
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
audio_out, spectrogram_path = infer(
|
| 234 |
ref_audio_input,
|
|
@@ -246,6 +257,7 @@ with demo:
|
|
| 246 |
basic_tts,
|
| 247 |
inputs=[
|
| 248 |
model_drop_down,
|
|
|
|
| 249 |
ref_audio_input,
|
| 250 |
ref_text_input,
|
| 251 |
gen_text_input,
|
|
@@ -259,11 +271,13 @@ with demo:
|
|
| 259 |
gr.Examples(
|
| 260 |
[
|
| 261 |
[
|
|
|
|
| 262 |
"./ref_wav/E-PV001-0001.wav",
|
| 263 |
"o pakafana’ ni akong to pinangan no romi’ad.",
|
| 264 |
"Mafana’ kiso a misanoPangcah haw?",
|
| 265 |
],
|
| 266 |
[
|
|
|
|
| 267 |
"./ref_wav/E-PV001-0010.wav",
|
| 268 |
"ano caay pina’on maripa’ iso cangra i, caay ka siwala^, to^to^ sa a pasayra toya kaitiraan nangra.",
|
| 269 |
"Pafelien cingra to misapoeneray a faloco', nanay mada'oc matilid i faloco' nira konini.",
|
|
@@ -271,6 +285,7 @@ with demo:
|
|
| 271 |
],
|
| 272 |
label="範例",
|
| 273 |
inputs=[
|
|
|
|
| 274 |
ref_audio_input,
|
| 275 |
ref_text_input,
|
| 276 |
gen_text_input,
|
|
|
|
| 138 |
label="模型",
|
| 139 |
)
|
| 140 |
|
| 141 |
+
language = gr.Radio(
|
| 142 |
+
choices=["阿美_秀姑巒", "太魯閣"],
|
| 143 |
+
label="語言",
|
| 144 |
+
value="阿美_秀姑巒",
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
ref_audio_input = gr.Audio(
|
| 148 |
type="filepath",
|
| 149 |
waveform_options=gr.WaveformOptions(
|
|
|
|
| 200 |
@gpu_decorator
|
| 201 |
def basic_tts(
|
| 202 |
model_drop_down: str,
|
| 203 |
+
language: str,
|
| 204 |
ref_audio_input: str,
|
| 205 |
ref_text_input: str,
|
| 206 |
gen_text_input: str,
|
|
|
|
| 234 |
ignore_punctuation = False
|
| 235 |
ipa_with_ng = False
|
| 236 |
|
| 237 |
+
ref_text_input = text_to_ipa(
|
| 238 |
+
ref_text_input, language, ignore_punctuation, ipa_with_ng
|
| 239 |
+
)
|
| 240 |
+
gen_text_input = text_to_ipa(
|
| 241 |
+
gen_text_input, language, ignore_punctuation, ipa_with_ng
|
| 242 |
+
)
|
| 243 |
|
| 244 |
audio_out, spectrogram_path = infer(
|
| 245 |
ref_audio_input,
|
|
|
|
| 257 |
basic_tts,
|
| 258 |
inputs=[
|
| 259 |
model_drop_down,
|
| 260 |
+
language,
|
| 261 |
ref_audio_input,
|
| 262 |
ref_text_input,
|
| 263 |
gen_text_input,
|
|
|
|
| 271 |
gr.Examples(
|
| 272 |
[
|
| 273 |
[
|
| 274 |
+
"阿美_秀姑巒",
|
| 275 |
"./ref_wav/E-PV001-0001.wav",
|
| 276 |
"o pakafana’ ni akong to pinangan no romi’ad.",
|
| 277 |
"Mafana’ kiso a misanoPangcah haw?",
|
| 278 |
],
|
| 279 |
[
|
| 280 |
+
"阿美_秀姑巒",
|
| 281 |
"./ref_wav/E-PV001-0010.wav",
|
| 282 |
"ano caay pina’on maripa’ iso cangra i, caay ka siwala^, to^to^ sa a pasayra toya kaitiraan nangra.",
|
| 283 |
"Pafelien cingra to misapoeneray a faloco', nanay mada'oc matilid i faloco' nira konini.",
|
|
|
|
| 285 |
],
|
| 286 |
label="範例",
|
| 287 |
inputs=[
|
| 288 |
+
language,
|
| 289 |
ref_audio_input,
|
| 290 |
ref_text_input,
|
| 291 |
gen_text_input,
|
ipa/ipa.py
CHANGED
|
@@ -4,11 +4,12 @@ import gradio as gr
|
|
| 4 |
from omegaconf import OmegaConf
|
| 5 |
|
| 6 |
g2p_config = OmegaConf.load("configs/g2p.yaml")
|
| 7 |
-
g2p_object = OmegaConf.to_object(g2p_config)
|
| 8 |
-
XIUGULUAN_G2P = g2p_object["g2p"]["阿美_秀姑巒"]
|
| 9 |
|
| 10 |
|
| 11 |
-
def text_to_ipa(
|
|
|
|
|
|
|
| 12 |
text = text.lower()
|
| 13 |
text = text.replace("'", "’")
|
| 14 |
text = re.sub(r"\s+", " ", text) # remove extra spaces
|
|
@@ -18,7 +19,7 @@ def text_to_ipa(text: str, ignore_punctuation=False, ipa_with_ng=False) -> str:
|
|
| 18 |
|
| 19 |
ipa = []
|
| 20 |
unknown_chars = set()
|
| 21 |
-
extended_g2p = {**
|
| 22 |
extended_g2p_sorted_keys = sorted(extended_g2p.keys(), key=len, reverse=True)
|
| 23 |
for word in words:
|
| 24 |
unknown_char = word
|
|
|
|
| 4 |
from omegaconf import OmegaConf
|
| 5 |
|
| 6 |
g2p_config = OmegaConf.load("configs/g2p.yaml")
|
| 7 |
+
g2p_object = OmegaConf.to_object(g2p_config)["g2p"]
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
+
def text_to_ipa(
|
| 11 |
+
text: str, language: str, ignore_punctuation=False, ipa_with_ng=False
|
| 12 |
+
) -> str:
|
| 13 |
text = text.lower()
|
| 14 |
text = text.replace("'", "’")
|
| 15 |
text = re.sub(r"\s+", " ", text) # remove extra spaces
|
|
|
|
| 19 |
|
| 20 |
ipa = []
|
| 21 |
unknown_chars = set()
|
| 22 |
+
extended_g2p = {**g2p_object[language], ",": ",", ".": ".", "?": "?", "!": "!"}
|
| 23 |
extended_g2p_sorted_keys = sorted(extended_g2p.keys(), key=len, reverse=True)
|
| 24 |
for word in words:
|
| 25 |
unknown_char = word
|