6186
GO_EMC_100914_Interview_bit_goog.mp3.txt
Normal file
6186
GO_EMC_100914_Interview_bit_goog.mp3.txt
Normal file
File diff suppressed because it is too large
Load Diff
6180
GO_EMC_100914_Interview_first_minute.mp3.txt
Normal file
6180
GO_EMC_100914_Interview_first_minute.mp3.txt
Normal file
File diff suppressed because it is too large
Load Diff
6293
GO_EMC_100914_Interview_first_minute_phone_call_model.mp3.txt
Normal file
6293
GO_EMC_100914_Interview_first_minute_phone_call_model.mp3.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -8,14 +8,6 @@ BUCKET_NAME_FMTR_TRANSCRIPT = 'tatt-transcript-{}'
|
|||||||
BUCKET_NAME_FMTR_TRANSCRIPT_GOOGLE = 'tatt_transcript_{}'
|
BUCKET_NAME_FMTR_TRANSCRIPT_GOOGLE = 'tatt_transcript_{}'
|
||||||
|
|
||||||
|
|
||||||
def GOOGLE_SPEECH_USE_ENHANCED():
|
|
||||||
enhanced = os.getenv('GOOGLE_SPEECH_USE_ENHANCED')
|
|
||||||
if enhanced and enhanced == 'true':
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
if os.getenv('AWS_CONFIG_FILEPATH'):
|
if os.getenv('AWS_CONFIG_FILEPATH'):
|
||||||
AWS_CONFIG_FILEPATH = Path(os.getenv('AWS_CONFIG_FILEPATH'))
|
AWS_CONFIG_FILEPATH = Path(os.getenv('AWS_CONFIG_FILEPATH'))
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -124,10 +124,18 @@ def get_transcription_jobs_dict():
|
|||||||
|
|
||||||
|
|
||||||
def get_num_audio_channels(filepath):
|
def get_num_audio_channels(filepath):
|
||||||
|
return get_media_info(filepath).channels
|
||||||
|
|
||||||
|
|
||||||
|
def get_sample_rate(filepath):
|
||||||
|
return get_media_info(filepath).samplerate
|
||||||
|
|
||||||
|
|
||||||
|
def get_media_info(filepath):
|
||||||
if isinstance(filepath, pathlib.PurePosixPath):
|
if isinstance(filepath, pathlib.PurePosixPath):
|
||||||
filepath = str(filepath)
|
filepath = str(filepath)
|
||||||
with audioread.audio_open(filepath) as f:
|
with audioread.audio_open(filepath) as f:
|
||||||
return f.channels
|
return f
|
||||||
|
|
||||||
|
|
||||||
def shell_call(command):
|
def shell_call(command):
|
||||||
|
|||||||
@@ -88,15 +88,33 @@ def status(job_name):
|
|||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
|
@click.option('--punctuation', is_flag=True, default=True,
|
||||||
|
help='only for Google Speech, defaults to True')
|
||||||
|
@click.option('--speaker-id', is_flag=True, default=True,
|
||||||
|
help='only for Google Speech, defaults to True')
|
||||||
|
@click.option('--model', default='phone_call',
|
||||||
|
help='only for Google Speech, defaults to "phone_call"')
|
||||||
|
@click.option('--use-enhanced', is_flag=True, default=True,
|
||||||
|
help='only for Google Speech, defaults to True')
|
||||||
@click.argument('media_filepath', type=str)
|
@click.argument('media_filepath', type=str)
|
||||||
@click.argument('service_name', type=str)
|
@click.argument('service_name', type=str)
|
||||||
def this(media_filepath, service_name):
|
def this(media_filepath, service_name, punctuation, speaker_id, model,
|
||||||
|
use_enhanced):
|
||||||
"""Sends a media file to be transcribed."""
|
"""Sends a media file to be transcribed."""
|
||||||
|
if service_name == 'google':
|
||||||
|
transcribe_kwargs = dict(
|
||||||
|
enable_automatic_punctuation=punctuation,
|
||||||
|
enable_speaker_diarization=speaker_id,
|
||||||
|
model=model,
|
||||||
|
use_enhanced=use_enhanced,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
transcribe_kwargs = {}
|
||||||
try:
|
try:
|
||||||
service = get_service(service_name)
|
service = get_service(service_name)
|
||||||
except KeyError as e:
|
except KeyError as e:
|
||||||
raise click.ClickException(
|
raise click.ClickException(
|
||||||
f'No such service! {print_all_services(print_=False)}')
|
f'No such service! {helpers.make_string_all_services()}')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
s = service(media_filepath)
|
s = service(media_filepath)
|
||||||
@@ -107,7 +125,7 @@ def this(media_filepath, service_name):
|
|||||||
f'Okay, transcribing {media_filepath} using {service_name}...')
|
f'Okay, transcribing {media_filepath} using {service_name}...')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
job_num = s.transcribe()
|
job_num = s.transcribe(**transcribe_kwargs)
|
||||||
except exceptions.AlreadyExistsError as e:
|
except exceptions.AlreadyExistsError as e:
|
||||||
raise click.ClickException(str(e))
|
raise click.ClickException(str(e))
|
||||||
click.echo(f'Okay, job {job_num} is being transcribed. Use "get" '
|
click.echo(f'Okay, job {job_num} is being transcribed. Use "get" '
|
||||||
|
|||||||
31
tatt/vendors/google.py
vendored
31
tatt/vendors/google.py
vendored
@@ -30,7 +30,7 @@ def _check_for_config():
|
|||||||
class Transcriber(TranscriberBaseClass):
|
class Transcriber(TranscriberBaseClass):
|
||||||
|
|
||||||
SUPPORTED_FORMATS = ['flac']
|
SUPPORTED_FORMATS = ['flac']
|
||||||
cost_per_15_seconds = .009
|
cost_per_15_seconds = [.004, .006, .009]
|
||||||
no_config_error_message = (
|
no_config_error_message = (
|
||||||
'Please sign up for the Google Speech-to-Text API '
|
'Please sign up for the Google Speech-to-Text API '
|
||||||
'and put the path to your credentials in an '
|
'and put the path to your credentials in an '
|
||||||
@@ -80,9 +80,14 @@ class Transcriber(TranscriberBaseClass):
|
|||||||
def check_for_config() -> bool:
|
def check_for_config() -> bool:
|
||||||
return _check_for_config()
|
return _check_for_config()
|
||||||
|
|
||||||
def transcribe(self) -> str:
|
def upload_file_if_too_big(self):
|
||||||
|
"""10MB limit as of Mar 7, 2019"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transcribe(self, **kwargs) -> str:
|
||||||
self.convert_file_format_if_needed()
|
self.convert_file_format_if_needed()
|
||||||
self._request_transcription()
|
self.upload_file_if_too_big()
|
||||||
|
self._request_transcription(**kwargs)
|
||||||
|
|
||||||
def _check_if_transcript_exists(self, transcript_name=None):
|
def _check_if_transcript_exists(self, transcript_name=None):
|
||||||
return storage.Blob(
|
return storage.Blob(
|
||||||
@@ -93,16 +98,17 @@ class Transcriber(TranscriberBaseClass):
|
|||||||
def _request_transcription(
|
def _request_transcription(
|
||||||
self,
|
self,
|
||||||
language_code='en-US',
|
language_code='en-US',
|
||||||
# model='video',
|
enable_automatic_punctuation=True,
|
||||||
|
enable_speaker_diarization=True,
|
||||||
|
model='phone_call',
|
||||||
|
use_enhanced=True,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Returns the job_name"""
|
"""Returns the job_name"""
|
||||||
if self._check_if_transcript_exists():
|
if self._check_if_transcript_exists():
|
||||||
raise exceptions.AlreadyExistsError(
|
raise exceptions.AlreadyExistsError(
|
||||||
f'{self.basename} already exists on {NAME}')
|
f'{self.basename} already exists on {NAME}')
|
||||||
num_audio_channels = helpers.get_num_audio_channels(self.filepath)
|
num_audio_channels = helpers.get_num_audio_channels(self.filepath)
|
||||||
|
sample_rate = helpers.get_sample_rate(self.filepath)
|
||||||
use_enhanced = config_mod.GOOGLE_SPEECH_USE_ENHANCED()
|
|
||||||
print(use_enhanced)
|
|
||||||
|
|
||||||
with io.open(self.filepath, 'rb') as audio_file:
|
with io.open(self.filepath, 'rb') as audio_file:
|
||||||
content = audio_file.read()
|
content = audio_file.read()
|
||||||
@@ -110,19 +116,16 @@ class Transcriber(TranscriberBaseClass):
|
|||||||
|
|
||||||
config = speech.types.RecognitionConfig(
|
config = speech.types.RecognitionConfig(
|
||||||
encoding=speech.enums.RecognitionConfig.AudioEncoding.FLAC,
|
encoding=speech.enums.RecognitionConfig.AudioEncoding.FLAC,
|
||||||
sample_rate_hertz=44100,
|
sample_rate_hertz=sample_rate,
|
||||||
audio_channel_count=num_audio_channels,
|
audio_channel_count=num_audio_channels,
|
||||||
enable_separate_recognition_per_channel=True,
|
enable_separate_recognition_per_channel=True,
|
||||||
enable_word_confidence=True,
|
enable_word_confidence=True,
|
||||||
enable_word_time_offsets=True,
|
enable_word_time_offsets=True,
|
||||||
language_code=language_code,
|
language_code=language_code,
|
||||||
enable_automatic_punctuation=True,
|
enable_automatic_punctuation=enable_automatic_punctuation,
|
||||||
enable_speaker_diarization=True,
|
enable_speaker_diarization=enable_speaker_diarization,
|
||||||
# not clear whether this has to be 'phone_call' in order to
|
model=model,
|
||||||
# use_enhanced
|
|
||||||
model='video',
|
|
||||||
use_enhanced=use_enhanced,
|
use_enhanced=use_enhanced,
|
||||||
# model=model,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.operation = self.speech_client.long_running_recognize(config,
|
self.operation = self.speech_client.long_running_recognize(config,
|
||||||
|
|||||||
Reference in New Issue
Block a user