added speaker_id conversions to Amazon and Google

This commit is contained in:
2019-03-08 10:23:01 -05:00
parent 3fc6dacfde
commit 0301b3be23
6 changed files with 130 additions and 55 deletions

38
tests/test_amazon.py Normal file
View File

@@ -0,0 +1,38 @@
import json
import os
import pytest
from transcript_processing.converters.amazon import AmazonConverter
@pytest.fixture
def transcript_data():
with open(os.getenv('AMAZON_TRANSCRIPT_TEST_FILE'), 'r') as fin:
return json.load(fin)
@pytest.fixture
def converter(transcript_data):
return AmazonConverter(transcript_data)
def test_get_word_objects(converter):
word_objects = converter.get_word_objects(converter.json_data)
assert word_objects
def test_get_speaker_segments(converter):
speaker_segments = converter.get_speaker_segments()
assert speaker_segments
def test_get_speaker_id(converter):
speaker_segments = converter.get_speaker_segments()
assert speaker_segments[54.58] == 0
assert speaker_segments[32.36] == 1
def test_convert(converter):
converter.convert()
print(converter.converted_words)

View File

@@ -1,33 +0,0 @@
import json
import os
import pytest
from transcript_processing.converters.google import (
make_json_friendly,
GoogleConverter,
)
from transcript_processing.config import GOOGLE_TRANSCRIPT_TEST_FILE
@pytest.fixture
def transcript():
with open(GOOGLE_TRANSCRIPT_TEST_FILE, 'r') as fin:
return fin.read()
def test_make_json_friendly(transcript):
friendly = make_json_friendly(transcript)
assert json.loads(friendly)
def test_pre_process(transcript):
with open(os.getenv('GOOGLE_TRANSCRIPT_TEST_FILE'), 'r') as fin:
transcript_data = fin.read()
g = GoogleConverter(transcript_data)
assert g.json_data
print(g.json_data)

39
tests/test_google.py Normal file
View File

@@ -0,0 +1,39 @@
import json
import os
import pytest
from transcript_processing.converters.google import (
make_json_friendly,
GoogleConverter,
)
@pytest.fixture
def transcript_data():
with open(os.getenv('GOOGLE_TRANSCRIPT_TEST_FILE'), 'r') as fin:
return fin.read()
@pytest.fixture
def converter(transcript_data):
return GoogleConverter(transcript_data)
def test_get_word_objects(converter):
word_objects = converter.get_word_objects(converter.json_data)
assert word_objects
def test_make_json_friendly(transcript_data):
friendly = make_json_friendly(transcript_data)
assert json.loads(friendly)
def test_pre_process(converter):
assert converter.json_data
def test_convert(converter):
converter.convert()
print(converter.converted_words)