File size: 1,647 Bytes
0c074b9
 
443b650
0c074b9
 
443b650
 
 
0c074b9
 
443b650
0c074b9
 
443b650
0c074b9
 
443b650
0c074b9
443b650
 
 
 
0c074b9
443b650
 
 
 
 
 
 
 
 
0c074b9
443b650
0c074b9
443b650
 
 
 
0c074b9
 
 
 
 
 
 
 
 
443b650
 
 
 
 
 
 
 
0c074b9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import re

import gradio as gr
from omegaconf import OmegaConf

g2p_config = OmegaConf.load("configs/g2p.yaml")
g2p_object = OmegaConf.to_object(g2p_config)
XIUGULUAN_G2P = g2p_object["g2p"]["阿美_秀姑巒"]


def text_to_ipa(text: str, ignore_punctuation=False, ipa_with_ng=False) -> str:
    text = text.lower()
    text = text.replace("'", "’")
    text = re.sub(r"\s+", " ", text)  # remove extra spaces
    words = text.split()  # change in future

    print(f"text: {words}")

    ipa = []
    unknown_chars = set()
    extended_g2p = {**XIUGULUAN_G2P, ",": ",", ".": ".", "?": "?", "!": "!"}
    extended_g2p_sorted_keys = sorted(extended_g2p.keys(), key=len, reverse=True)
    for word in words:
        unknown_char = word
        converted_word = word
        for key in extended_g2p_sorted_keys:
            unknown_char = unknown_char.replace(key, "")
            converted_word = converted_word.replace(key, extended_g2p[key])

        if len(unknown_char) > 0:  # If there are unknown characters
            unknown_chars.update(set(unknown_char))
            continue

        ipa.append(converted_word)

    if len(unknown_chars) > 0:
        raise gr.Error(
            f"Unknown characters: {', '.join(unknown_chars)}. Please remove them and try again."
        )

    ipa = (
        " ".join(ipa)
        .replace("g", "ɡ")
        .replace("ʦ", "t͡s")
        .replace("ʨ", "t͡ɕ")
        .replace("R", "ʀ")
        .replace("ʤ", "dʒ")
    )

    if ignore_punctuation:
        ipa = re.sub(r"[.?!,]", "", ipa)

    if ipa_with_ng:
        ipa = ipa.replace("ŋ", "nɡ")

    print(f"ipa: {ipa}")
    return ipa