fixed #42, fixed #43, got sample rate programmatically

This commit is contained in:
2019-03-07 17:58:27 -05:00
parent 2c93d4724f
commit 5b7a05379f
7 changed files with 18706 additions and 26 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -8,14 +8,6 @@ BUCKET_NAME_FMTR_TRANSCRIPT = 'tatt-transcript-{}'
BUCKET_NAME_FMTR_TRANSCRIPT_GOOGLE = 'tatt_transcript_{}'
def GOOGLE_SPEECH_USE_ENHANCED():
enhanced = os.getenv('GOOGLE_SPEECH_USE_ENHANCED')
if enhanced and enhanced == 'true':
return True
else:
return False
if os.getenv('AWS_CONFIG_FILEPATH'):
AWS_CONFIG_FILEPATH = Path(os.getenv('AWS_CONFIG_FILEPATH'))
else:

View File

@@ -124,10 +124,18 @@ def get_transcription_jobs_dict():
def get_num_audio_channels(filepath):
return get_media_info(filepath).channels
def get_sample_rate(filepath):
return get_media_info(filepath).samplerate
def get_media_info(filepath):
if isinstance(filepath, pathlib.PurePosixPath):
filepath = str(filepath)
with audioread.audio_open(filepath) as f:
return f.channels
return f
def shell_call(command):

View File

@@ -88,15 +88,33 @@ def status(job_name):
@cli.command()
@click.option('--punctuation', is_flag=True, default=True,
help='only for Google Speech, defaults to True')
@click.option('--speaker-id', is_flag=True, default=True,
help='only for Google Speech, defaults to True')
@click.option('--model', default='phone_call',
help='only for Google Speech, defaults to "phone_call"')
@click.option('--use-enhanced', is_flag=True, default=True,
help='only for Google Speech, defaults to True')
@click.argument('media_filepath', type=str)
@click.argument('service_name', type=str)
def this(media_filepath, service_name):
def this(media_filepath, service_name, punctuation, speaker_id, model,
use_enhanced):
"""Sends a media file to be transcribed."""
if service_name == 'google':
transcribe_kwargs = dict(
enable_automatic_punctuation=punctuation,
enable_speaker_diarization=speaker_id,
model=model,
use_enhanced=use_enhanced,
)
else:
transcribe_kwargs = {}
try:
service = get_service(service_name)
except KeyError as e:
raise click.ClickException(
f'No such service! {print_all_services(print_=False)}')
f'No such service! {helpers.make_string_all_services()}')
try:
s = service(media_filepath)
@@ -107,7 +125,7 @@ def this(media_filepath, service_name):
f'Okay, transcribing {media_filepath} using {service_name}...')
try:
job_num = s.transcribe()
job_num = s.transcribe(**transcribe_kwargs)
except exceptions.AlreadyExistsError as e:
raise click.ClickException(str(e))
click.echo(f'Okay, job {job_num} is being transcribed. Use "get" '

View File

@@ -30,7 +30,7 @@ def _check_for_config():
class Transcriber(TranscriberBaseClass):
SUPPORTED_FORMATS = ['flac']
cost_per_15_seconds = .009
cost_per_15_seconds = [.004, .006, .009]
no_config_error_message = (
'Please sign up for the Google Speech-to-Text API '
'and put the path to your credentials in an '
@@ -80,9 +80,14 @@ class Transcriber(TranscriberBaseClass):
def check_for_config() -> bool:
return _check_for_config()
def transcribe(self) -> str:
def upload_file_if_too_big(self):
"""10MB limit as of Mar 7, 2019"""
pass
def transcribe(self, **kwargs) -> str:
self.convert_file_format_if_needed()
self._request_transcription()
self.upload_file_if_too_big()
self._request_transcription(**kwargs)
def _check_if_transcript_exists(self, transcript_name=None):
return storage.Blob(
@@ -93,16 +98,17 @@ class Transcriber(TranscriberBaseClass):
def _request_transcription(
self,
language_code='en-US',
# model='video',
enable_automatic_punctuation=True,
enable_speaker_diarization=True,
model='phone_call',
use_enhanced=True,
) -> str:
"""Returns the job_name"""
if self._check_if_transcript_exists():
raise exceptions.AlreadyExistsError(
f'{self.basename} already exists on {NAME}')
num_audio_channels = helpers.get_num_audio_channels(self.filepath)
use_enhanced = config_mod.GOOGLE_SPEECH_USE_ENHANCED()
print(use_enhanced)
sample_rate = helpers.get_sample_rate(self.filepath)
with io.open(self.filepath, 'rb') as audio_file:
content = audio_file.read()
@@ -110,19 +116,16 @@ class Transcriber(TranscriberBaseClass):
config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=44100,
sample_rate_hertz=sample_rate,
audio_channel_count=num_audio_channels,
enable_separate_recognition_per_channel=True,
enable_word_confidence=True,
enable_word_time_offsets=True,
language_code=language_code,
enable_automatic_punctuation=True,
enable_speaker_diarization=True,
# not clear whether this has to be 'phone_call' in order to
# use_enhanced
model='video',
enable_automatic_punctuation=enable_automatic_punctuation,
enable_speaker_diarization=enable_speaker_diarization,
model=model,
use_enhanced=use_enhanced,
# model=model,
)
self.operation = self.speech_client.long_running_recognize(config,