optimize segmentation
This commit is contained in:
@@ -163,12 +163,27 @@ def str_contains_punctuation(word):
|
||||
def split_string_by_punctuations(s):
|
||||
result = []
|
||||
txt = ""
|
||||
for char in s:
|
||||
|
||||
previous_char = ""
|
||||
next_char = ""
|
||||
for i in range(len(s)):
|
||||
char = s[i]
|
||||
if i > 0:
|
||||
previous_char = s[i - 1]
|
||||
if i < len(s) - 1:
|
||||
next_char = s[i + 1]
|
||||
|
||||
if char == "." and previous_char.isdigit() and next_char.isdigit():
|
||||
# 取现1万,按2.5%收取手续费, 2.5 中的 . 不能作为换行标记
|
||||
txt += char
|
||||
continue
|
||||
|
||||
if char not in const.PUNCTUATIONS:
|
||||
txt += char
|
||||
else:
|
||||
result.append(txt.strip())
|
||||
txt = ""
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user