Spaces:
Paused
Paused
Commit
·
2b16965
1
Parent(s):
41d1b2b
fix: normalize some punctuation and special characters in text_to_ipa function
Browse files- ipa/ipa.py +3 -0
ipa/ipa.py
CHANGED
|
@@ -125,6 +125,9 @@ def text_to_ipa(
|
|
| 125 |
text = lower_formosan_text(text, language)
|
| 126 |
# text = text.replace("'", "’")
|
| 127 |
text = re.sub(r"\s+", " ", text) # remove extra spaces
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
ipa, unknown_chars = convert_to_ipa(text, g2p_object[language])
|
| 130 |
|
|
|
|
| 125 |
text = lower_formosan_text(text, language)
|
| 126 |
# text = text.replace("'", "’")
|
| 127 |
text = re.sub(r"\s+", " ", text) # remove extra spaces
|
| 128 |
+
text = re.sub(r"[\"\-\“\”]", "", text) # remove punctuation
|
| 129 |
+
text = re.sub(r"[\ʼ\’\']", "ʼ", text) # normalize ʼ
|
| 130 |
+
text = text.replace("^", "⌃") # normalize ⌃
|
| 131 |
|
| 132 |
ipa, unknown_chars = convert_to_ipa(text, g2p_object[language])
|
| 133 |
|