created converter for speechmatics timing
This commit is contained in:
20
helpers.py
20
helpers.py
@@ -6,6 +6,8 @@ st = StanfordNERTagger('/usr/local/bin/english.all.3class.distsim.crf.ser.gz',
|
||||
|
||||
PROPER_NOUN_TAGS = ['ORGANIZATION', 'PERSON', 'LOCATION']
|
||||
|
||||
PUNCTUATION = ['.', '?', ',', ':', '"', '!']
|
||||
|
||||
|
||||
def tag_words(words):
|
||||
return st.tag(words)
|
||||
@@ -15,3 +17,21 @@ def is_a_proper_noun(phrase):
|
||||
tagged_words = tag_words(phrase.split())
|
||||
return any(tagged_word[1] in PROPER_NOUN_TAGS
|
||||
for tagged_word in tagged_words)
|
||||
|
||||
|
||||
def get_punc_before(word):
|
||||
punc = []
|
||||
for char in word:
|
||||
if char.isalpha():
|
||||
return punc
|
||||
if char in PUNCTUATION:
|
||||
punc.append(char)
|
||||
|
||||
|
||||
def get_punc_after(word):
|
||||
punc = []
|
||||
for char in reversed(word):
|
||||
if char.isalpha():
|
||||
return punc
|
||||
if char in PUNCTUATION:
|
||||
punc.insert(0, char)
|
||||
Reference in New Issue
Block a user