fixed problem with escaped unicode chars, from Spanish smple data

This commit is contained in:
2019-07-18 17:12:48 +02:00
parent 5693f91a20
commit 640710541e
7 changed files with 9486 additions and 12 deletions

View File

@@ -24,19 +24,19 @@ def is_a_proper_noun(phrase):
def get_punc_before(word):
punc = []
for char in word:
if char.isalpha():
return punc
if char in PUNCTUATION:
punc.append(char)
else:
return punc
def get_punc_after(word):
punc = []
for char in reversed(word):
if char.isalpha():
return punc
if char in PUNCTUATION:
punc.insert(0, char)
else:
return punc
def is_path(string):