Spaces:
Paused
Paused
| import re | |
| import gradio as gr | |
| from omegaconf import OmegaConf | |
| g2p_config = OmegaConf.load("configs/g2p.yaml") | |
| g2p_object = OmegaConf.to_object(g2p_config) | |
| XIUGULUAN_G2P = g2p_object["g2p"]["阿美_秀姑巒"] | |
| def text_to_ipa(text: str, ignore_punctuation=False, ipa_with_ng=False) -> str: | |
| text = text.lower() | |
| text = text.replace("'", "’") | |
| text = re.sub(r"\s+", " ", text) # remove extra spaces | |
| words = text.split() # change in future | |
| print(f"text: {words}") | |
| ipa = [] | |
| unknown_chars = set() | |
| extended_g2p = {**XIUGULUAN_G2P, ",": ",", ".": ".", "?": "?", "!": "!"} | |
| extended_g2p_sorted_keys = sorted(extended_g2p.keys(), key=len, reverse=True) | |
| for word in words: | |
| unknown_char = word | |
| converted_word = word | |
| for key in extended_g2p_sorted_keys: | |
| unknown_char = unknown_char.replace(key, "") | |
| converted_word = converted_word.replace(key, extended_g2p[key]) | |
| if len(unknown_char) > 0: # If there are unknown characters | |
| unknown_chars.update(set(unknown_char)) | |
| continue | |
| ipa.append(converted_word) | |
| if len(unknown_chars) > 0: | |
| raise gr.Error( | |
| f"Unknown characters: {', '.join(unknown_chars)}. Please remove them and try again." | |
| ) | |
| ipa = ( | |
| " ".join(ipa) | |
| .replace("g", "ɡ") | |
| .replace("ʦ", "t͡s") | |
| .replace("ʨ", "t͡ɕ") | |
| .replace("R", "ʀ") | |
| .replace("ʤ", "dʒ") | |
| ) | |
| if ignore_punctuation: | |
| ipa = re.sub(r"[.?!,]", "", ipa) | |
| if ipa_with_ng: | |
| ipa = ipa.replace("ŋ", "nɡ") | |
| print(f"ipa: {ipa}") | |
| return ipa | |