updated tests to current format of converters, updated static/not-static decorators on base class to match its children, added GoogleConverter, moved one or two methods to the base class because they all work the same.

This commit is contained in:
2019-03-07 02:30:48 -05:00
parent 6333f55c87
commit d30ecad583
16 changed files with 346 additions and 46 deletions

View File

@@ -0,0 +1,97 @@
import abc
from collections import namedtuple
import os
from . import helpers
from . import converters
Word = namedtuple('Word', 'start end confidence word always_capitalized next_word')
class TranscriptConverter:
__metaclass__ = abc.ABCMeta
def __init__(self, json_data: dict):
self.json_data = json_data
def convert(self):
tagged_words = None
word_objects = self.get_word_objects(self.json_data)
words = self.get_words(word_objects)
tagged_words = helpers.tag_words(words)
self.converted_words = self.convert_words(
word_objects,
words,
tagged_words
)
@abc.abstractmethod
def get_word_objects(self, json_data):
pass
def get_words(self, word_objects):
return [self.get_word_word(w)
for w in word_objects]
@abc.abstractmethod
def convert_words(self, word_objects, words, tagged_words=None):
pass
@staticmethod
@abc.abstractmethod
def get_word_start(word_object):
pass
@staticmethod
@abc.abstractmethod
def get_word_end(word_object):
pass
@staticmethod
@abc.abstractmethod
def get_word_confidence(word_object):
pass
@staticmethod
@abc.abstractmethod
def get_word_word(word_object):
pass
@staticmethod
def check_if_always_capitalized(word, index, tagged_words):
if word.upper() == 'I':
return True
word_category = tagged_words[index][1]
return word_category in helpers.PROPER_NOUN_TAGS
def get_word_object(self, word_object, index, tagged_words, word_objects):
word = self.get_word_word(word_object)
return Word(
self.get_word_start(word_object),
self.get_word_end(word_object),
self.get_word_confidence(word_object),
word,
self.check_if_always_capitalized(word, index, tagged_words),
self.get_next_word(word_objects, index)
)
def get_next_word(self, word_objects, index):
if index < len(word_objects) - 1:
return word_objects[index + 1]
def save(self, path, output_target):
with open(path, 'w') as fout:
fout.write(getattr(self, output_target)())
return path
from . import outputs
for name, val in outputs.__dict__.items():
if callable(val):
setattr(TranscriptConverter, name, val)