92 lines
2.4 KiB
Python
92 lines
2.4 KiB
Python
import json
|
|
import os
|
|
|
|
import pytest
|
|
|
|
from transcript_processing.converters.amazon import AmazonConverter
|
|
from transcript_processing.converters.speechmatics import SpeechmaticsConverter
|
|
from transcript_processing.converters.gentle import GentleConverter
|
|
from transcript_processing.converters.google import GoogleConverter
|
|
|
|
|
|
@pytest.fixture
|
|
def json_transcript():
|
|
with open(os.getenv('AMAZON_TRANSCRIPT_TEST_FILE')) as fin:
|
|
transcript = json.load(fin)
|
|
yield transcript
|
|
|
|
|
|
def test_json_transcript(json_transcript):
|
|
assert json_transcript["jobName"] == "Lelandmp3"
|
|
|
|
|
|
def test_amazon():
|
|
with open(os.getenv('AMAZON_TRANSCRIPT_TEST_FILE'), 'r') as fin:
|
|
json_data = json.load(fin)
|
|
|
|
a = AmazonConverter(json_data)
|
|
a.convert()
|
|
assert a.converted_words[0] == {
|
|
'start': 5.49,
|
|
'end': 5.97,
|
|
'confidence': 1.0,
|
|
'word': 'So',
|
|
'always_capitalized': False,
|
|
'punc_after': False,
|
|
'punc_before': False
|
|
}
|
|
|
|
|
|
def test_speechmatics():
|
|
with open(os.getenv('SPEECHMATICS_TRANSCRIPT_TEST_FILE'), 'r') as fin:
|
|
json_data = json.load(fin)
|
|
|
|
a = SpeechmaticsConverter(json_data)
|
|
|
|
a.convert()
|
|
assert a.converted_words[0] == {
|
|
'start': 5.98,
|
|
'end': 6.11,
|
|
'confidence': 0.67,
|
|
'word': 'For',
|
|
'always_capitalized': False,
|
|
'punc_after': False,
|
|
'punc_before': False,
|
|
}
|
|
|
|
|
|
def test_gentle():
|
|
with open(os.getenv('GENTLE_TRANSCRIPT_TEST_FILE'), 'r') as fin:
|
|
json_data = json.load(fin)
|
|
|
|
a = GentleConverter(json_data)
|
|
a.convert()
|
|
assert a.converted_words[0] == {
|
|
'start': 0.35,
|
|
'end': 1.58,
|
|
'confidence': 1,
|
|
'word': '[noise]',
|
|
'always_capitalized': False,
|
|
'punc_after': False,
|
|
'punc_before': False
|
|
}
|
|
|
|
|
|
def test_google():
|
|
with open(os.getenv('GOOGLE_TRANSCRIPT_TEST_FILE'), 'r') as fin:
|
|
transcript_data = fin.read()
|
|
|
|
g = GoogleConverter(transcript_data)
|
|
|
|
g.convert()
|
|
assert g.converted_words[0] == {
|
|
'start': 4,
|
|
'end': 5.5,
|
|
'confidence': 0.88,
|
|
'word': 'Testing',
|
|
'always_capitalized': False,
|
|
'punc_after': [','],
|
|
'punc_before': False,
|
|
}
|
|
|