updated tests to current format of converters, updated static/not-static decorators on base class to match its children, added GoogleConverter, moved one or two methods to the base class because they all work the same.

This commit is contained in:
2019-03-07 02:30:48 -05:00
parent 6333f55c87
commit d30ecad583
16 changed files with 346 additions and 46 deletions

View File

@@ -0,0 +1,46 @@
from pathlib import Path
from nltk.tag.stanford import StanfordNERTagger
st = StanfordNERTagger('/usr/local/bin/english.all.3class.distsim.crf.ser.gz',
'/usr/local/bin/stanford-ner.jar')
PROPER_NOUN_TAGS = ['ORGANIZATION', 'PERSON', 'LOCATION']
PUNCTUATION = ['.', '?', ',', ':', '"', '!']
def tag_words(words):
return st.tag(words)
def is_a_proper_noun(phrase):
tagged_words = tag_words(phrase.split())
return any(tagged_word[1] in PROPER_NOUN_TAGS
for tagged_word in tagged_words)
def get_punc_before(word):
punc = []
for char in word:
if char.isalpha():
return punc
if char in PUNCTUATION:
punc.append(char)
def get_punc_after(word):
punc = []
for char in reversed(word):
if char.isalpha():
return punc
if char in PUNCTUATION:
punc.insert(0, char)
def is_path(string):
try:
return Path(string).exists()
except OSError:
return False