created converter for speechmatics timing

This commit is contained in:
2018-11-22 03:56:03 -05:00
parent 4da2317db8
commit d5a37df5a8
12 changed files with 120525 additions and 16 deletions

View File

@@ -6,6 +6,8 @@ st = StanfordNERTagger('/usr/local/bin/english.all.3class.distsim.crf.ser.gz',
PROPER_NOUN_TAGS = ['ORGANIZATION', 'PERSON', 'LOCATION']
PUNCTUATION = ['.', '?', ',', ':', '"', '!']
def tag_words(words):
return st.tag(words)
@@ -15,3 +17,21 @@ def is_a_proper_noun(phrase):
tagged_words = tag_words(phrase.split())
return any(tagged_word[1] in PROPER_NOUN_TAGS
for tagged_word in tagged_words)
def get_punc_before(word):
punc = []
for char in word:
if char.isalpha():
return punc
if char in PUNCTUATION:
punc.append(char)
def get_punc_after(word):
punc = []
for char in reversed(word):
if char.isalpha():
return punc
if char in PUNCTUATION:
punc.insert(0, char)