fixed problem with escaped unicode chars, from Spanish smple data

2019-07-18 17:12:48 +02:00
parent 5693f91a20
commit 640710541e
7 changed files with 9486 additions and 12 deletions
--- a/transcript_processing/converter.py
+++ b/transcript_processing/converter.py
@@ -87,14 +87,14 @@ class TranscriptConverter:
        else:
            if word.upper() == 'I':
                return True
-            word_category = tagged_words[index][1] 
+            word_category = tagged_words[index][1]
            return word_category in helpers.PROPER_NOUN_TAGS

    def get_word_object(
-            self, 
-            word_object, 
-            index, 
-            tagged_words, 
+            self,
+            word_object,
+            index,
+            tagged_words,
            word_objects,
            speaker_segments=None,
            ):
@@ -107,7 +107,7 @@ class TranscriptConverter:
            self.check_if_always_capitalized(word, index, tagged_words),
            self.get_next_word(word_objects, index),
            self.get_speaker_id(word_object, speaker_segments),
-                ) 
+                )

    def get_next_word(self, word_objects, index):
        if index < len(word_objects) - 1: