Skip to content

[cherry-pick][r1.2]Fix mix front (#2493) #2501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 8, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 39 additions & 177 deletions paddlespeech/t2s/frontend/mix_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import Dict
from typing import List

Expand All @@ -30,7 +29,6 @@ def __init__(self,
self.zh_frontend = Frontend(
phone_vocab_path=phone_vocab_path, tone_vocab_path=tone_vocab_path)
self.en_frontend = English(phone_vocab_path=phone_vocab_path)
self.SENTENCE_SPLITOR = re.compile(r'([:、,;。?!,;?!][”’]?)')
self.sp_id = self.zh_frontend.vocab_phones["sp"]
self.sp_id_tensor = paddle.to_tensor([self.sp_id])

Expand All @@ -47,188 +45,56 @@ def is_alphabet(self, char):
else:
return False

def is_number(self, char):
if char >= '\u0030' and char <= '\u0039':
return True
else:
return False

def is_other(self, char):
if not (self.is_chinese(char) or self.is_number(char) or
self.is_alphabet(char)):
if not (self.is_chinese(char) or self.is_alphabet(char)):
return True
else:
return False

def is_end(self, before_char, after_char) -> bool:
flag = 0
for char in (before_char, after_char):
if self.is_alphabet(char) or char == " ":
flag += 1
if flag == 2:
return True
else:
return False

def _replace(self, text: str) -> str:
new_text = ""

# get "." indexs
point = "."
point_indexs = []
index = -1
for i in range(text.count(point)):
index = text.find(".", index + 1, len(text))
point_indexs.append(index)

# replace "." -> "。" when English sentence ending
if len(point_indexs) == 0:
new_text = text

elif len(point_indexs) == 1:
point_index = point_indexs[0]
if point_index == 0 or point_index == len(text) - 1:
new_text = text
else:
if not self.is_end(text[point_index - 1], text[point_index +
1]):
new_text = text
else:
new_text = text[:point_index] + "。" + text[point_index + 1:]

elif len(point_indexs) == 2:
first_index = point_indexs[0]
end_index = point_indexs[1]

# first
if first_index != 0:
if not self.is_end(text[first_index - 1], text[first_index +
1]):
new_text += (text[:first_index] + ".")
else:
new_text += (text[:first_index] + "。")
else:
new_text += "."
# last
if end_index != len(text) - 1:
if not self.is_end(text[end_index - 1], text[end_index + 1]):
new_text += text[point_indexs[-2] + 1:]
else:
new_text += (text[point_indexs[-2] + 1:end_index] + "。" +
text[end_index + 1:])
else:
new_text += "."

else:
first_index = point_indexs[0]
end_index = point_indexs[-1]
# first
if first_index != 0:
if not self.is_end(text[first_index - 1], text[first_index +
1]):
new_text += (text[:first_index] + ".")
else:
new_text += (text[:first_index] + "。")
else:
new_text += "."
# middle
for j in range(1, len(point_indexs) - 1):
point_index = point_indexs[j]
if not self.is_end(text[point_index - 1], text[point_index +
1]):
new_text += (
text[point_indexs[j - 1] + 1:point_index] + ".")
else:
new_text += (
text[point_indexs[j - 1] + 1:point_index] + "。")
# last
if end_index != len(text) - 1:
if not self.is_end(text[end_index - 1], text[end_index + 1]):
new_text += text[point_indexs[-2] + 1:]
else:
new_text += (text[point_indexs[-2] + 1:end_index] + "。" +
text[end_index + 1:])
else:
new_text += "."

return new_text

def _split(self, text: str) -> List[str]:
text = re.sub(r'[《》【】<=>{}()()#&@“”^_|…\\]', '', text)
# 替换英文句子的句号 "." --> "。" 用于后续分句
text = self._replace(text)
text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
text = text.strip()
sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
return sentences

def _distinguish(self, text: str) -> List[str]:
def get_segment(self, text: str) -> List[str]:
# sentence --> [ch_part, en_part, ch_part, ...]

segments = []
types = []

flag = 0
temp_seg = ""
temp_lang = ""

# Determine the type of each character. type: blank, chinese, alphabet, number, unk and point.
for ch in text:
if ch == ".":
types.append("point")
elif self.is_chinese(ch):
if self.is_chinese(ch):
types.append("zh")
elif self.is_alphabet(ch):
types.append("en")
elif ch == " ":
types.append("blank")
elif self.is_number(ch):
types.append("num")
else:
types.append("unk")
types.append("other")

assert len(types) == len(text)

for i in range(len(types)):

# find the first char of the seg
if flag == 0:
# 首个字符是中文,英文或者数字
if types[i] == "zh" or types[i] == "en" or types[i] == "num":
temp_seg += text[i]
temp_lang = types[i]
flag = 1
temp_seg += text[i]
temp_lang = types[i]
flag = 1

else:
# 数字和小数点均与前面的字符合并,类型属于前面一个字符的类型
if types[i] == temp_lang or types[i] == "num" or types[
i] == "point":
temp_seg += text[i]

# 数字与后面的任意字符都拼接
elif temp_lang == "num":
temp_seg += text[i]
if types[i] == "zh" or types[i] == "en":
if temp_lang == "other":
if types[i] == temp_lang:
temp_seg += text[i]
else:
temp_seg += text[i]
temp_lang = types[i]

# 如果是空格则与前面字符拼接
elif types[i] == "blank":
temp_seg += text[i]

elif types[i] == "unk":
pass

else:
segments.append((temp_seg, temp_lang))

if types[i] == "zh" or types[i] == "en":
if types[i] == temp_lang:
temp_seg += text[i]
elif types[i] == "other":
temp_seg += text[i]
else:
segments.append((temp_seg, temp_lang))
temp_seg = text[i]
temp_lang = types[i]
flag = 1
else:
flag = 0
temp_seg = ""
temp_lang = ""

segments.append((temp_seg, temp_lang))

Expand All @@ -241,34 +107,30 @@ def get_input_ids(self,
add_sp: bool=True,
to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:

sentences = self._split(sentence)
segments = self.get_segment(sentence)

phones_list = []
result = {}
for text in sentences:
phones_seg = []
segments = self._distinguish(text)
for seg in segments:
content = seg[0]
lang = seg[1]
if content != '':
if lang == "en":
input_ids = self.en_frontend.get_input_ids(
content, merge_sentences=True, to_tensor=to_tensor)
else:
input_ids = self.zh_frontend.get_input_ids(
content,
merge_sentences=True,
get_tone_ids=get_tone_ids,
to_tensor=to_tensor)

phones_seg.append(input_ids["phone_ids"][0])
if add_sp:
phones_seg.append(self.sp_id_tensor)

if phones_seg == []:
phones_seg.append(self.sp_id_tensor)
phones = paddle.concat(phones_seg)
phones_list.append(phones)
for seg in segments:
content = seg[0]
lang = seg[1]
if content != '':
if lang == "en":
input_ids = self.en_frontend.get_input_ids(
content, merge_sentences=False, to_tensor=to_tensor)
else:
input_ids = self.zh_frontend.get_input_ids(
content,
merge_sentences=False,
get_tone_ids=get_tone_ids,
to_tensor=to_tensor)
if add_sp:
input_ids["phone_ids"][-1] = paddle.concat(
[input_ids["phone_ids"][-1], self.sp_id_tensor])

for phones in input_ids["phone_ids"]:
phones_list.append(phones)

if merge_sentences:
merge_list = paddle.concat(phones_list)
Expand Down