fixed problem with escaped unicode chars, from Spanish smple data

2019-07-18 17:12:48 +02:00
parent 5693f91a20
commit 640710541e
7 changed files with 9486 additions and 12 deletions
--- a/transcript_processing/helpers.py
+++ b/transcript_processing/helpers.py
@@ -24,19 +24,19 @@ def is_a_proper_noun(phrase):
 def get_punc_before(word):
    punc = []
    for char in word:
-        if char.isalpha():
-            return punc
        if char in PUNCTUATION:
            punc.append(char)
+        else:
+            return punc


 def get_punc_after(word):
    punc = []
    for char in reversed(word):
-        if char.isalpha():
-            return punc
        if char in PUNCTUATION:
            punc.insert(0, char)
+        else:
+            return punc


 def is_path(string):