simplified to not make output that is pipe-able, made google and amazon work with and without speaker IDs

This commit is contained in:
2019-03-08 14:26:16 -05:00
parent 0301b3be23
commit 8f63320be4
5 changed files with 80 additions and 42 deletions

View File

@@ -12,27 +12,46 @@ def transcript_data():
return json.load(fin)
@pytest.fixture
def transcript_data_no_speaker_id():
with open(
os.getenv('AMAZON_TRANSCRIPT_TEST_FILE_NO_SPEAKER_ID'), 'r') as fin:
return json.load(fin)
@pytest.fixture
def converter(transcript_data):
return AmazonConverter(transcript_data)
@pytest.fixture
def converter_no_speaker_id(transcript_data_no_speaker_id):
return AmazonConverter(transcript_data_no_speaker_id)
def test_get_word_objects(converter):
def test_get_word_objects(converter, converter_no_speaker_id):
word_objects = converter.get_word_objects(converter.json_data)
assert word_objects
word_objects = converter_no_speaker_id.get_word_objects(
converter_no_speaker_id.json_data)
assert word_objects
def test_get_speaker_segments(converter):
def test_get_speaker_segments(converter, converter_no_speaker_id):
speaker_segments = converter.get_speaker_segments()
assert speaker_segments
speaker_segments = converter_no_speaker_id.get_speaker_segments()
assert speaker_segments is None
def test_get_speaker_id(converter):
speaker_segments = converter.get_speaker_segments()
assert speaker_segments[54.58] == 0
assert speaker_segments[32.36] == 1
def test_convert(converter):
def test_convert(converter, converter_no_speaker_id):
converter.convert()
print(converter.converted_words)
converter_no_speaker_id.convert()
print(converter.converted_words)

View File

@@ -15,25 +15,42 @@ def transcript_data():
return fin.read()
@pytest.fixture
def transcript_data_no_speaker_id():
with open(
os.getenv('GOOGLE_TRANSCRIPT_TEST_FILE_NO_SPEAKER_ID'), 'r') as fin:
return fin.read()
@pytest.fixture
def converter(transcript_data):
return GoogleConverter(transcript_data)
def test_get_word_objects(converter):
@pytest.fixture
def converter_no_speaker_id(transcript_data_no_speaker_id):
return GoogleConverter(transcript_data_no_speaker_id)
def test_get_word_objects(converter, converter_no_speaker_id):
word_objects = converter.get_word_objects(converter.json_data)
assert word_objects
word_objects = converter_no_speaker_id.get_word_objects(
converter_no_speaker_id.json_data)
assert word_objects
def test_convert(converter, converter_no_speaker_id):
converter.convert()
converter_no_speaker_id.convert()
def test_make_json_friendly(transcript_data):
friendly = make_json_friendly(transcript_data)
assert json.loads(friendly)
def test_pre_process(converter):
def test_pre_process(converter, converter_no_speaker_id):
assert converter.json_data
def test_convert(converter):
converter.convert()
print(converter.converted_words)
assert converter_no_speaker_id.json_data