txya900619 commited on
Commit
2b16965
·
1 Parent(s): 41d1b2b

fix: normalize some punctuation and special characters in text_to_ipa function

Browse files
Files changed (1) hide show
  1. ipa/ipa.py +3 -0
ipa/ipa.py CHANGED
@@ -125,6 +125,9 @@ def text_to_ipa(
125
  text = lower_formosan_text(text, language)
126
  # text = text.replace("'", "’")
127
  text = re.sub(r"\s+", " ", text) # remove extra spaces
 
 
 
128
 
129
  ipa, unknown_chars = convert_to_ipa(text, g2p_object[language])
130
 
 
125
  text = lower_formosan_text(text, language)
126
  # text = text.replace("'", "’")
127
  text = re.sub(r"\s+", " ", text) # remove extra spaces
128
+ text = re.sub(r"[\"\-\“\”]", "", text) # remove punctuation
129
+ text = re.sub(r"[\ʼ\’\']", "ʼ", text) # normalize ʼ
130
+ text = text.replace("^", "⌃") # normalize ⌃
131
 
132
  ipa, unknown_chars = convert_to_ipa(text, g2p_object[language])
133