6186
GO_EMC_100914_Interview_bit_goog.mp3.txt
Normal file
6186
GO_EMC_100914_Interview_bit_goog.mp3.txt
Normal file
File diff suppressed because it is too large
Load Diff
6180
GO_EMC_100914_Interview_first_minute.mp3.txt
Normal file
6180
GO_EMC_100914_Interview_first_minute.mp3.txt
Normal file
File diff suppressed because it is too large
Load Diff
6293
GO_EMC_100914_Interview_first_minute_phone_call_model.mp3.txt
Normal file
6293
GO_EMC_100914_Interview_first_minute_phone_call_model.mp3.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -8,14 +8,6 @@ BUCKET_NAME_FMTR_TRANSCRIPT = 'tatt-transcript-{}'
|
||||
BUCKET_NAME_FMTR_TRANSCRIPT_GOOGLE = 'tatt_transcript_{}'
|
||||
|
||||
|
||||
def GOOGLE_SPEECH_USE_ENHANCED():
|
||||
enhanced = os.getenv('GOOGLE_SPEECH_USE_ENHANCED')
|
||||
if enhanced and enhanced == 'true':
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
if os.getenv('AWS_CONFIG_FILEPATH'):
|
||||
AWS_CONFIG_FILEPATH = Path(os.getenv('AWS_CONFIG_FILEPATH'))
|
||||
else:
|
||||
|
||||
@@ -124,10 +124,18 @@ def get_transcription_jobs_dict():
|
||||
|
||||
|
||||
def get_num_audio_channels(filepath):
|
||||
return get_media_info(filepath).channels
|
||||
|
||||
|
||||
def get_sample_rate(filepath):
|
||||
return get_media_info(filepath).samplerate
|
||||
|
||||
|
||||
def get_media_info(filepath):
|
||||
if isinstance(filepath, pathlib.PurePosixPath):
|
||||
filepath = str(filepath)
|
||||
with audioread.audio_open(filepath) as f:
|
||||
return f.channels
|
||||
return f
|
||||
|
||||
|
||||
def shell_call(command):
|
||||
|
||||
@@ -88,15 +88,33 @@ def status(job_name):
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option('--punctuation', is_flag=True, default=True,
|
||||
help='only for Google Speech, defaults to True')
|
||||
@click.option('--speaker-id', is_flag=True, default=True,
|
||||
help='only for Google Speech, defaults to True')
|
||||
@click.option('--model', default='phone_call',
|
||||
help='only for Google Speech, defaults to "phone_call"')
|
||||
@click.option('--use-enhanced', is_flag=True, default=True,
|
||||
help='only for Google Speech, defaults to True')
|
||||
@click.argument('media_filepath', type=str)
|
||||
@click.argument('service_name', type=str)
|
||||
def this(media_filepath, service_name):
|
||||
def this(media_filepath, service_name, punctuation, speaker_id, model,
|
||||
use_enhanced):
|
||||
"""Sends a media file to be transcribed."""
|
||||
if service_name == 'google':
|
||||
transcribe_kwargs = dict(
|
||||
enable_automatic_punctuation=punctuation,
|
||||
enable_speaker_diarization=speaker_id,
|
||||
model=model,
|
||||
use_enhanced=use_enhanced,
|
||||
)
|
||||
else:
|
||||
transcribe_kwargs = {}
|
||||
try:
|
||||
service = get_service(service_name)
|
||||
except KeyError as e:
|
||||
raise click.ClickException(
|
||||
f'No such service! {print_all_services(print_=False)}')
|
||||
f'No such service! {helpers.make_string_all_services()}')
|
||||
|
||||
try:
|
||||
s = service(media_filepath)
|
||||
@@ -107,7 +125,7 @@ def this(media_filepath, service_name):
|
||||
f'Okay, transcribing {media_filepath} using {service_name}...')
|
||||
|
||||
try:
|
||||
job_num = s.transcribe()
|
||||
job_num = s.transcribe(**transcribe_kwargs)
|
||||
except exceptions.AlreadyExistsError as e:
|
||||
raise click.ClickException(str(e))
|
||||
click.echo(f'Okay, job {job_num} is being transcribed. Use "get" '
|
||||
|
||||
31
tatt/vendors/google.py
vendored
31
tatt/vendors/google.py
vendored
@@ -30,7 +30,7 @@ def _check_for_config():
|
||||
class Transcriber(TranscriberBaseClass):
|
||||
|
||||
SUPPORTED_FORMATS = ['flac']
|
||||
cost_per_15_seconds = .009
|
||||
cost_per_15_seconds = [.004, .006, .009]
|
||||
no_config_error_message = (
|
||||
'Please sign up for the Google Speech-to-Text API '
|
||||
'and put the path to your credentials in an '
|
||||
@@ -80,9 +80,14 @@ class Transcriber(TranscriberBaseClass):
|
||||
def check_for_config() -> bool:
|
||||
return _check_for_config()
|
||||
|
||||
def transcribe(self) -> str:
|
||||
def upload_file_if_too_big(self):
|
||||
"""10MB limit as of Mar 7, 2019"""
|
||||
pass
|
||||
|
||||
def transcribe(self, **kwargs) -> str:
|
||||
self.convert_file_format_if_needed()
|
||||
self._request_transcription()
|
||||
self.upload_file_if_too_big()
|
||||
self._request_transcription(**kwargs)
|
||||
|
||||
def _check_if_transcript_exists(self, transcript_name=None):
|
||||
return storage.Blob(
|
||||
@@ -93,16 +98,17 @@ class Transcriber(TranscriberBaseClass):
|
||||
def _request_transcription(
|
||||
self,
|
||||
language_code='en-US',
|
||||
# model='video',
|
||||
enable_automatic_punctuation=True,
|
||||
enable_speaker_diarization=True,
|
||||
model='phone_call',
|
||||
use_enhanced=True,
|
||||
) -> str:
|
||||
"""Returns the job_name"""
|
||||
if self._check_if_transcript_exists():
|
||||
raise exceptions.AlreadyExistsError(
|
||||
f'{self.basename} already exists on {NAME}')
|
||||
num_audio_channels = helpers.get_num_audio_channels(self.filepath)
|
||||
|
||||
use_enhanced = config_mod.GOOGLE_SPEECH_USE_ENHANCED()
|
||||
print(use_enhanced)
|
||||
sample_rate = helpers.get_sample_rate(self.filepath)
|
||||
|
||||
with io.open(self.filepath, 'rb') as audio_file:
|
||||
content = audio_file.read()
|
||||
@@ -110,19 +116,16 @@ class Transcriber(TranscriberBaseClass):
|
||||
|
||||
config = speech.types.RecognitionConfig(
|
||||
encoding=speech.enums.RecognitionConfig.AudioEncoding.FLAC,
|
||||
sample_rate_hertz=44100,
|
||||
sample_rate_hertz=sample_rate,
|
||||
audio_channel_count=num_audio_channels,
|
||||
enable_separate_recognition_per_channel=True,
|
||||
enable_word_confidence=True,
|
||||
enable_word_time_offsets=True,
|
||||
language_code=language_code,
|
||||
enable_automatic_punctuation=True,
|
||||
enable_speaker_diarization=True,
|
||||
# not clear whether this has to be 'phone_call' in order to
|
||||
# use_enhanced
|
||||
model='video',
|
||||
enable_automatic_punctuation=enable_automatic_punctuation,
|
||||
enable_speaker_diarization=enable_speaker_diarization,
|
||||
model=model,
|
||||
use_enhanced=use_enhanced,
|
||||
# model=model,
|
||||
)
|
||||
|
||||
self.operation = self.speech_client.long_running_recognize(config,
|
||||
|
||||
Reference in New Issue
Block a user