finished refactoring to a single repo, and to OOP for straight-forward adding of new ASR APIs. added Gentle, and added viral_overlay JSON output. added tests
This commit is contained in:
13
Pipfile
Normal file
13
Pipfile
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
[[source]]
|
||||||
|
name = "pypi"
|
||||||
|
url = "https://pypi.org/simple"
|
||||||
|
verify_ssl = true
|
||||||
|
|
||||||
|
[dev-packages]
|
||||||
|
|
||||||
|
[packages]
|
||||||
|
nltk = "*"
|
||||||
|
pytest = "*"
|
||||||
|
|
||||||
|
[requires]
|
||||||
|
python_version = "3.7"
|
||||||
86
Pipfile.lock
generated
Normal file
86
Pipfile.lock
generated
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
{
|
||||||
|
"_meta": {
|
||||||
|
"hash": {
|
||||||
|
"sha256": "1b05879d48694b4c7fe1234da3a3744660dd38272835812cc05b270d6f9b06de"
|
||||||
|
},
|
||||||
|
"pipfile-spec": 6,
|
||||||
|
"requires": {
|
||||||
|
"python_version": "3.7"
|
||||||
|
},
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"name": "pypi",
|
||||||
|
"url": "https://pypi.org/simple",
|
||||||
|
"verify_ssl": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"atomicwrites": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4",
|
||||||
|
"sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6"
|
||||||
|
],
|
||||||
|
"version": "==1.3.0"
|
||||||
|
},
|
||||||
|
"attrs": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:10cbf6e27dbce8c30807caf056c8eb50917e0eaafe86347671b57254006c3e69",
|
||||||
|
"sha256:ca4be454458f9dec299268d472aaa5a11f67a4ff70093396e1ceae9c76cf4bbb"
|
||||||
|
],
|
||||||
|
"version": "==18.2.0"
|
||||||
|
},
|
||||||
|
"more-itertools": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:38a936c0a6d98a38bcc2d03fdaaedaba9f412879461dd2ceff8d37564d6522e4",
|
||||||
|
"sha256:c0a5785b1109a6bd7fac76d6837fd1feca158e54e521ccd2ae8bfe393cc9d4fc",
|
||||||
|
"sha256:fe7a7cae1ccb57d33952113ff4fa1bc5f879963600ed74918f1236e212ee50b9"
|
||||||
|
],
|
||||||
|
"version": "==5.0.0"
|
||||||
|
},
|
||||||
|
"nltk": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:286f6797204ffdb52525a1d21ec0a221ec68b8e3fa4f2d25f412ac8e63c70e8d"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==3.4"
|
||||||
|
},
|
||||||
|
"pluggy": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:8ddc32f03971bfdf900a81961a48ccf2fb677cf7715108f85295c67405798616",
|
||||||
|
"sha256:980710797ff6a041e9a73a5787804f848996ecaa6f8a1b1e08224a5894f2074a"
|
||||||
|
],
|
||||||
|
"version": "==0.8.1"
|
||||||
|
},
|
||||||
|
"py": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:bf92637198836372b520efcba9e020c330123be8ce527e535d185ed4b6f45694",
|
||||||
|
"sha256:e76826342cefe3c3d5f7e8ee4316b80d1dd8a300781612ddbc765c17ba25a6c6"
|
||||||
|
],
|
||||||
|
"version": "==1.7.0"
|
||||||
|
},
|
||||||
|
"pytest": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:65aeaa77ae87c7fc95de56285282546cfa9c886dc8e5dc78313db1c25e21bc07",
|
||||||
|
"sha256:6ac6d467d9f053e95aaacd79f831dbecfe730f419c6c7022cb316b365cd9199d"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==4.2.0"
|
||||||
|
},
|
||||||
|
"singledispatch": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:5b06af87df13818d14f08a028e42f566640aef80805c3b50c5056b086e3c2b9c",
|
||||||
|
"sha256:833b46966687b3de7f438c761ac475213e53b306740f1abfaa86e1d1aae56aa8"
|
||||||
|
],
|
||||||
|
"version": "==3.4.0.3"
|
||||||
|
},
|
||||||
|
"six": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
|
||||||
|
"sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
|
||||||
|
],
|
||||||
|
"version": "==1.12.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"develop": {}
|
||||||
|
}
|
||||||
6
README.md
Normal file
6
README.md
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
# Non-pip Requirement: Stanford NER JAR
|
||||||
|
|
||||||
|
- download the .jar [here](https://nlp.stanford.edu/software/CRF-NER.shtml#Download)
|
||||||
|
- put these files in in /usr/local/bin/:
|
||||||
|
- stanford-ner.jar
|
||||||
|
- english.all.3class.distsim.crf.ser.gz
|
||||||
@@ -1 +0,0 @@
|
|||||||
from transcript_processing.converter import TranscriptConverter
|
|
||||||
18
converter.py
18
converter.py
@@ -4,7 +4,7 @@ from collections import namedtuple
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import helpers
|
import helpers
|
||||||
from transcript_processing.converters import converters
|
import converters
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -27,8 +27,7 @@ class TranscriptConverter:
|
|||||||
word_objects = self.get_word_objects(data)
|
word_objects = self.get_word_objects(data)
|
||||||
words = self.get_words(word_objects)
|
words = self.get_words(word_objects)
|
||||||
|
|
||||||
if self.output_target == 'interactive_transcript':
|
tagged_words = helpers.tag_words(words)
|
||||||
tagged_words = helpers.tag_words(words)
|
|
||||||
|
|
||||||
self.converted_words = self.convert_words(
|
self.converted_words = self.convert_words(
|
||||||
word_objects,
|
word_objects,
|
||||||
@@ -89,10 +88,19 @@ class TranscriptConverter:
|
|||||||
if index < len(word_objects) - 1:
|
if index < len(word_objects) - 1:
|
||||||
return word_objects[index + 1]
|
return word_objects[index + 1]
|
||||||
|
|
||||||
def to_json(self):
|
def interactive_transcript(self):
|
||||||
return json.dumps(self.converted_words, indent=4)
|
return json.dumps(self.converted_words, indent=4)
|
||||||
|
|
||||||
|
def viral_overlay(self):
|
||||||
|
return json.dumps(
|
||||||
|
[{'start': word['start'],
|
||||||
|
'stop': word['end'],
|
||||||
|
'word': word['word']}
|
||||||
|
for word in self.converted_words],
|
||||||
|
indent=4
|
||||||
|
)
|
||||||
|
|
||||||
def save(self, path):
|
def save(self, path):
|
||||||
with open(path, 'w') as fout:
|
with open(path, 'w') as fout:
|
||||||
fout.write(self.to_json())
|
fout.write(getattr(self, self.output_target)())
|
||||||
return path
|
return path
|
||||||
|
|||||||
@@ -1,24 +0,0 @@
|
|||||||
"""
|
|
||||||
|
|
||||||
fields for converted transcript:
|
|
||||||
|
|
||||||
start
|
|
||||||
end
|
|
||||||
word
|
|
||||||
confidence
|
|
||||||
index
|
|
||||||
always_capitalized
|
|
||||||
punc_before
|
|
||||||
punc_after
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from transcript_processing.converters.amazon import amazon_converter
|
|
||||||
from transcript_processing.converters.speechmatics import speechmatics_aligned_text_converter, speechmatics_converter
|
|
||||||
|
|
||||||
|
|
||||||
converters = {
|
|
||||||
'speechmatics': speechmatics_converter,
|
|
||||||
'speechmatics_align': speechmatics_aligned_text_converter,
|
|
||||||
'amazon': amazon_converter,
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from transcript_processing import helpers
|
from converter import TranscriptConverter
|
||||||
|
import helpers
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -10,11 +11,11 @@ class AmazonConverter(TranscriptConverter):
|
|||||||
super().__init__(path, output_target)
|
super().__init__(path, output_target)
|
||||||
|
|
||||||
def get_word_objects(self, json_data):
|
def get_word_objects(self, json_data):
|
||||||
return data['results']['items']
|
return json_data['results']['items']
|
||||||
|
|
||||||
def get_words(self, word_objects):
|
def get_words(self, word_objects):
|
||||||
return [self.get_word_word(w)
|
return [self.get_word_word(w)
|
||||||
for w in word_objects])
|
for w in word_objects]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_word_start(word_object):
|
def get_word_start(word_object):
|
||||||
@@ -30,7 +31,7 @@ class AmazonConverter(TranscriptConverter):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_word_word(word_object):
|
def get_word_word(word_object):
|
||||||
word_word = w['alternatives'][0]['content']
|
word_word = word_object['alternatives'][0]['content']
|
||||||
if word_word == 'i':
|
if word_word == 'i':
|
||||||
# weird Amazon quirk
|
# weird Amazon quirk
|
||||||
word_word = 'I'
|
word_word = 'I'
|
||||||
@@ -44,11 +45,11 @@ class AmazonConverter(TranscriptConverter):
|
|||||||
num_words = len(words)
|
num_words = len(words)
|
||||||
index = 0
|
index = 0
|
||||||
|
|
||||||
for i, w in enumerate(words):
|
for i, w in enumerate(word_objects):
|
||||||
if w['type'] == 'punctuation':
|
if w['type'] == 'punctuation':
|
||||||
continue
|
continue
|
||||||
next_word_punc_after = None
|
next_word_punc_after = None
|
||||||
word_obj = self.get_word_object(w, i, tagged_words, words)
|
word_obj = self.get_word_object(w, i, tagged_words, word_objects)
|
||||||
|
|
||||||
if word_obj.next_word:
|
if word_obj.next_word:
|
||||||
next_word = self.get_word_word(word_obj.next_word)
|
next_word = self.get_word_word(word_obj.next_word)
|
||||||
@@ -60,7 +61,7 @@ class AmazonConverter(TranscriptConverter):
|
|||||||
next_word_punc_after = None
|
next_word_punc_after = None
|
||||||
|
|
||||||
if word_obj.word.lower() == 'you' and next_word == 'know':
|
if word_obj.word.lower() == 'you' and next_word == 'know':
|
||||||
prev_word = words[i - 1]
|
prev_word = word_objects[i - 1]
|
||||||
if prev_word['type'] != 'punctuation':
|
if prev_word['type'] != 'punctuation':
|
||||||
converted_words[-1]['punc_after'] = ','
|
converted_words[-1]['punc_after'] = ','
|
||||||
if next_word_type != 'punctuation':
|
if next_word_type != 'punctuation':
|
||||||
@@ -83,64 +84,3 @@ class AmazonConverter(TranscriptConverter):
|
|||||||
punc_after = False
|
punc_after = False
|
||||||
|
|
||||||
return converted_words
|
return converted_words
|
||||||
|
|
||||||
|
|
||||||
def amazon_converter(data: dict):
|
|
||||||
data = json.load(data)
|
|
||||||
converted_words = []
|
|
||||||
words = data['results']['items']
|
|
||||||
tagged_words = helpers.tag_words(
|
|
||||||
[w['alternatives'][0]['content'] for w in words])
|
|
||||||
punc_before = False
|
|
||||||
punc_after = False
|
|
||||||
num_words = len(words)
|
|
||||||
index = 0
|
|
||||||
|
|
||||||
for i, w in enumerate(words):
|
|
||||||
if w['type'] == 'punctuation':
|
|
||||||
continue
|
|
||||||
next_word_punc_after = None
|
|
||||||
word_start = float(w['start_time'])
|
|
||||||
word_end = float(w['end_time'])
|
|
||||||
confidence = float(w['alternatives'][0]['confidence'])
|
|
||||||
word = w['alternatives'][0]['content']
|
|
||||||
is_proper_noun = tagged_words[i][1] in helpers.PROPER_NOUN_TAGS
|
|
||||||
|
|
||||||
next_word = None
|
|
||||||
if i < num_words - 1:
|
|
||||||
next_word = words[i + 1]['alternatives'][0]['content']
|
|
||||||
next_word_type = words[i + 1]['type']
|
|
||||||
if next_word == '.':
|
|
||||||
punc_after = '.'
|
|
||||||
elif next_word == ',':
|
|
||||||
punc_after = ','
|
|
||||||
elif next_word_punc_after:
|
|
||||||
punc_after = next_word_punc_after
|
|
||||||
next_word_punc_after = None
|
|
||||||
|
|
||||||
if word == 'i':
|
|
||||||
# weird Amazon quirk
|
|
||||||
word = 'I'
|
|
||||||
|
|
||||||
if word.lower() == 'you' and next_word == 'know':
|
|
||||||
prev_word = words[i - 1]
|
|
||||||
if prev_word['type'] != 'punctuation':
|
|
||||||
converted_words[-1]['punc_after'] = ','
|
|
||||||
if next_word_type != 'punctuation':
|
|
||||||
next_word_punc_after = ','
|
|
||||||
|
|
||||||
converted_words.append({
|
|
||||||
'start': word_start,
|
|
||||||
'end': word_end,
|
|
||||||
'confidence': confidence,
|
|
||||||
'word': word,
|
|
||||||
'always_capitalized': is_proper_noun or word == 'I',
|
|
||||||
'index': index,
|
|
||||||
'punc_after': punc_after,
|
|
||||||
'punc_before': punc_before,
|
|
||||||
})
|
|
||||||
|
|
||||||
index += 1
|
|
||||||
punc_after = False
|
|
||||||
|
|
||||||
return converted_words
|
|
||||||
|
|||||||
60
converters/gentle.py
Normal file
60
converters/gentle.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
from converter import TranscriptConverter
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class GentleConverter(TranscriptConverter):
|
||||||
|
|
||||||
|
def __init__(self, path, output_target):
|
||||||
|
super().__init__(path, output_target)
|
||||||
|
|
||||||
|
def get_word_objects(self, json_data):
|
||||||
|
return json_data['words']
|
||||||
|
|
||||||
|
def get_words(self, word_objects):
|
||||||
|
return [self.get_word_word(w)
|
||||||
|
for w in word_objects]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_word_start(word_object):
|
||||||
|
return word_object['start']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_word_end(word_object):
|
||||||
|
return word_object['end']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_word_confidence(word_object):
|
||||||
|
return 1
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_word_word(word_object):
|
||||||
|
return word_object['alignedWord']
|
||||||
|
|
||||||
|
def convert_words(self, word_objects, words, tagged_words=None):
|
||||||
|
converted_words = []
|
||||||
|
punc_before = False
|
||||||
|
punc_after = False
|
||||||
|
num_words = len(words)
|
||||||
|
index = 0
|
||||||
|
|
||||||
|
for i, w in enumerate(word_objects):
|
||||||
|
word_obj = self.get_word_object(w, i, tagged_words, word_objects)
|
||||||
|
|
||||||
|
converted_words.append({
|
||||||
|
'start': word_obj.start,
|
||||||
|
'end': word_obj.end,
|
||||||
|
'confidence': word_obj.confidence,
|
||||||
|
'word': word_obj.word,
|
||||||
|
'always_capitalized': (
|
||||||
|
word_obj.is_proper_noun
|
||||||
|
or word_obj.word == 'I'),
|
||||||
|
'index': index,
|
||||||
|
'punc_after': punc_after,
|
||||||
|
'punc_before': punc_before,
|
||||||
|
})
|
||||||
|
|
||||||
|
index += 1
|
||||||
|
punc_after = False
|
||||||
|
|
||||||
|
return converted_words
|
||||||
|
|
||||||
@@ -1,10 +1,74 @@
|
|||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from transcript_processing import helpers
|
from converter import TranscriptConverter
|
||||||
|
import helpers
|
||||||
|
|
||||||
|
|
||||||
Word = namedtuple('Word', 'start end word')
|
|
||||||
|
class SpeechmaticsConverter(TranscriptConverter):
|
||||||
|
|
||||||
|
def __init__(self, path, output_target):
|
||||||
|
super().__init__(path, output_target)
|
||||||
|
|
||||||
|
def get_word_objects(self, json_data):
|
||||||
|
return json_data['words']
|
||||||
|
|
||||||
|
def get_words(self, word_objects):
|
||||||
|
return [self.get_word_word(w)
|
||||||
|
for w in word_objects]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_word_start(word_object):
|
||||||
|
return float(word_object['time'])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_word_end(word_object):
|
||||||
|
return (SpeechmaticsConverter.get_word_start(word_object)
|
||||||
|
+ float(word_object['duration']))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_word_confidence(word_object):
|
||||||
|
return float(word_object['confidence'])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_word_word(word_object):
|
||||||
|
return word_object['name']
|
||||||
|
|
||||||
|
def convert_words(self, word_objects, words, tagged_words=None):
|
||||||
|
converted_words = []
|
||||||
|
punc_before = False
|
||||||
|
punc_after = False
|
||||||
|
num_words = len(words)
|
||||||
|
index = 0
|
||||||
|
|
||||||
|
for i, w in enumerate(word_objects):
|
||||||
|
word_obj = self.get_word_object(w, i, tagged_words, word_objects)
|
||||||
|
if word_obj.word == '.':
|
||||||
|
continue
|
||||||
|
|
||||||
|
if word_obj.next_word:
|
||||||
|
next_word = self.get_word_word(word_obj.next_word)
|
||||||
|
if next_word == '.':
|
||||||
|
punc_after = '.'
|
||||||
|
|
||||||
|
converted_words.append({
|
||||||
|
'start': word_obj.start,
|
||||||
|
'end': word_obj.end,
|
||||||
|
'confidence': word_obj.confidence,
|
||||||
|
'word': word_obj.word,
|
||||||
|
'always_capitalized': (
|
||||||
|
word_obj.is_proper_noun
|
||||||
|
or word_obj.word == 'I'),
|
||||||
|
'index': index,
|
||||||
|
'punc_after': punc_after,
|
||||||
|
'punc_before': punc_before,
|
||||||
|
})
|
||||||
|
|
||||||
|
index += 1
|
||||||
|
punc_after = False
|
||||||
|
|
||||||
|
return converted_words
|
||||||
|
|
||||||
|
|
||||||
def speechmatics_converter(data):
|
def speechmatics_converter(data):
|
||||||
@@ -55,6 +119,8 @@ def speechmatics_aligned_text_converter(data):
|
|||||||
class Exhausted(Exception):
|
class Exhausted(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
Word = namedtuple('Word', 'start end word')
|
||||||
|
|
||||||
def get_time(transcript, index):
|
def get_time(transcript, index):
|
||||||
time_index = transcript.find('time=', index)
|
time_index = transcript.find('time=', index)
|
||||||
if time_index == -1:
|
if time_index == -1:
|
||||||
@@ -108,6 +174,3 @@ def speechmatics_aligned_text_converter(data):
|
|||||||
})
|
})
|
||||||
|
|
||||||
return converted_words
|
return converted_words
|
||||||
|
|
||||||
|
|
||||||
def gentle_converter
|
|
||||||
|
|||||||
70
tests/test_conversion.py
Normal file
70
tests/test_conversion.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from converters.amazon import AmazonConverter
|
||||||
|
from converters.speechmatics import SpeechmaticsConverter
|
||||||
|
from converters.gentle import GentleConverter
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def json_transcript():
|
||||||
|
with open(os.getenv('AMAZON_TRANSCRIPT_TEST_FILE')) as fin:
|
||||||
|
transcript = json.load(fin)
|
||||||
|
yield transcript
|
||||||
|
|
||||||
|
|
||||||
|
def test_json_transcript(json_transcript):
|
||||||
|
assert json_transcript["jobName"] == "Lelandmp3"
|
||||||
|
|
||||||
|
|
||||||
|
def test_amazon():
|
||||||
|
a = AmazonConverter(
|
||||||
|
os.getenv('AMAZON_TRANSCRIPT_TEST_FILE'),
|
||||||
|
'interactive_transcript')
|
||||||
|
a.convert()
|
||||||
|
assert a.converted_words[0] == {
|
||||||
|
'start': 5.49,
|
||||||
|
'end': 5.97,
|
||||||
|
'confidence': 1.0,
|
||||||
|
'word': 'So',
|
||||||
|
'always_capitalized': False,
|
||||||
|
'index': 0,
|
||||||
|
'punc_after': False,
|
||||||
|
'punc_before': False
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_speechmatics():
|
||||||
|
a = SpeechmaticsConverter(
|
||||||
|
os.getenv('SPEECHMATICS_TRANSCRIPT_TEST_FILE'),
|
||||||
|
'interactive_transcript')
|
||||||
|
a.convert()
|
||||||
|
assert a.converted_words[0] == {
|
||||||
|
'start': 5.98,
|
||||||
|
'end': 6.11,
|
||||||
|
'confidence': 0.67,
|
||||||
|
'word': 'For',
|
||||||
|
'always_capitalized': False,
|
||||||
|
'index': 0,
|
||||||
|
'punc_after': False,
|
||||||
|
'punc_before': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_gentle():
|
||||||
|
a = GentleConverter(
|
||||||
|
os.getenv('GENTLE_TRANSCRIPT_TEST_FILE'),
|
||||||
|
'interactive_transcript')
|
||||||
|
a.convert()
|
||||||
|
assert a.converted_words[0] == {
|
||||||
|
'start': 0.35,
|
||||||
|
'end': 1.58,
|
||||||
|
'confidence': 1,
|
||||||
|
'word': '[noise]',
|
||||||
|
'always_capitalized': False,
|
||||||
|
'index': 0,
|
||||||
|
'punc_after': False,
|
||||||
|
'punc_before': False
|
||||||
|
}
|
||||||
23
tests/test_convert_viraloverlay.py
Normal file
23
tests/test_convert_viraloverlay.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from converters.amazon import AmazonConverter
|
||||||
|
from converters.speechmatics import SpeechmaticsConverter
|
||||||
|
from converters.gentle import GentleConverter
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_gentle():
|
||||||
|
a = GentleConverter(
|
||||||
|
os.getenv('GENTLE_TRANSCRIPT_TEST_FILE'),
|
||||||
|
'viral_overlay')
|
||||||
|
a.convert()
|
||||||
|
assert json.loads(a.viral_overlay())[0] == {
|
||||||
|
'start': 0.35,
|
||||||
|
'stop': 1.58,
|
||||||
|
'word': '[noise]',
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user