fixed #42, fixed #43, got sample rate programmatically

2019-03-07 17:58:27 -05:00
parent 2c93d4724f
commit 5b7a05379f
7 changed files with 18706 additions and 26 deletions
--- a/GO_EMC_100914_Interview_bit_goog.mp3.txt
+++ b/GO_EMC_100914_Interview_bit_goog.mp3.txt
--- a/GO_EMC_100914_Interview_first_minute.mp3.txt
+++ b/GO_EMC_100914_Interview_first_minute.mp3.txt
--- a/GO_EMC_100914_Interview_first_minute_phone_call_model.mp3.txt
+++ b/GO_EMC_100914_Interview_first_minute_phone_call_model.mp3.txt
--- a/tatt/config.py
+++ b/tatt/config.py
@@ -8,14 +8,6 @@ BUCKET_NAME_FMTR_TRANSCRIPT = 'tatt-transcript-{}'
 BUCKET_NAME_FMTR_TRANSCRIPT_GOOGLE = 'tatt_transcript_{}'


-def GOOGLE_SPEECH_USE_ENHANCED():
-    enhanced = os.getenv('GOOGLE_SPEECH_USE_ENHANCED')
-    if enhanced and enhanced == 'true':
-        return True
-    else:
-        return False
-
-
 if os.getenv('AWS_CONFIG_FILEPATH'):
    AWS_CONFIG_FILEPATH = Path(os.getenv('AWS_CONFIG_FILEPATH'))
 else:
--- a/tatt/helpers.py
+++ b/tatt/helpers.py
@@ -124,10 +124,18 @@ def get_transcription_jobs_dict():


 def get_num_audio_channels(filepath):
+    return get_media_info(filepath).channels
+
+
+def get_sample_rate(filepath):
+    return get_media_info(filepath).samplerate
+
+
+def get_media_info(filepath):
    if isinstance(filepath, pathlib.PurePosixPath):
        filepath = str(filepath)
    with audioread.audio_open(filepath) as f:
-        return f.channels
+        return f


 def shell_call(command):
--- a/tatt/transcribe.py
+++ b/tatt/transcribe.py
@@ -88,15 +88,33 @@ def status(job_name):


@cli.command()
+@click.option('--punctuation', is_flag=True, default=True, 
+              help='only for Google Speech, defaults to True')
+@click.option('--speaker-id', is_flag=True, default=True, 
+              help='only for Google Speech, defaults to True')
+@click.option('--model', default='phone_call', 
+              help='only for Google Speech, defaults to "phone_call"')
+@click.option('--use-enhanced', is_flag=True, default=True,
+              help='only for Google Speech, defaults to True')
@click.argument('media_filepath', type=str)
@click.argument('service_name', type=str)
-def this(media_filepath, service_name):
+def this(media_filepath, service_name, punctuation, speaker_id, model,
+         use_enhanced):
    """Sends a media file to be transcribed."""
+    if service_name == 'google':
+        transcribe_kwargs = dict(
+            enable_automatic_punctuation=punctuation,
+            enable_speaker_diarization=speaker_id,
+            model=model,
+            use_enhanced=use_enhanced,
+            )
+    else:
+        transcribe_kwargs = {}
    try:
        service = get_service(service_name)
    except KeyError as e:
        raise click.ClickException(
-            f'No such service! {print_all_services(print_=False)}')
+            f'No such service! {helpers.make_string_all_services()}')

    try:
        s = service(media_filepath)
@@ -107,7 +125,7 @@ def this(media_filepath, service_name):
      f'Okay, transcribing {media_filepath} using {service_name}...')

    try:
-        job_num = s.transcribe()
+        job_num = s.transcribe(**transcribe_kwargs)
    except exceptions.AlreadyExistsError as e:
        raise click.ClickException(str(e))
    click.echo(f'Okay, job {job_num} is being transcribed.  Use "get" '
--- a/tatt/vendors/google.py
+++ b/tatt/vendors/google.py
@@ -30,7 +30,7 @@ def _check_for_config():
 class Transcriber(TranscriberBaseClass):

    SUPPORTED_FORMATS = ['flac']
-    cost_per_15_seconds = .009
+    cost_per_15_seconds = [.004, .006, .009]
    no_config_error_message = (
            'Please sign up for the Google Speech-to-Text API '
            'and put the path to your credentials in an '
@@ -80,9 +80,14 @@ class Transcriber(TranscriberBaseClass):
    def check_for_config() -> bool:
        return _check_for_config()

-    def transcribe(self) -> str:
+    def upload_file_if_too_big(self):
+        """10MB limit as of Mar 7, 2019"""
+        pass
+
+    def transcribe(self, **kwargs) -> str:
        self.convert_file_format_if_needed()
-        self._request_transcription()
+        self.upload_file_if_too_big()
+        self._request_transcription(**kwargs)

    def _check_if_transcript_exists(self, transcript_name=None):
        return storage.Blob(
@@ -93,16 +98,17 @@ class Transcriber(TranscriberBaseClass):
    def _request_transcription(
            self, 
            language_code='en-US',
-            # model='video',
+            enable_automatic_punctuation=True,
+            enable_speaker_diarization=True,
+            model='phone_call',
+            use_enhanced=True,
            ) -> str:
        """Returns the job_name"""
        if self._check_if_transcript_exists():
            raise exceptions.AlreadyExistsError(
                f'{self.basename} already exists on {NAME}')
        num_audio_channels = helpers.get_num_audio_channels(self.filepath)
-
-        use_enhanced = config_mod.GOOGLE_SPEECH_USE_ENHANCED()
-        print(use_enhanced)
+        sample_rate = helpers.get_sample_rate(self.filepath)

        with io.open(self.filepath, 'rb') as audio_file:
            content = audio_file.read()
@@ -110,19 +116,16 @@ class Transcriber(TranscriberBaseClass):

        config = speech.types.RecognitionConfig(
            encoding=speech.enums.RecognitionConfig.AudioEncoding.FLAC,
-            sample_rate_hertz=44100,
+            sample_rate_hertz=sample_rate,
            audio_channel_count=num_audio_channels,
            enable_separate_recognition_per_channel=True,
            enable_word_confidence=True,
            enable_word_time_offsets=True,
            language_code=language_code,
-            enable_automatic_punctuation=True,
-            enable_speaker_diarization=True,
-            # not clear whether this has to be 'phone_call' in order to
-            # use_enhanced
-            model='video',
+            enable_automatic_punctuation=enable_automatic_punctuation,
+            enable_speaker_diarization=enable_speaker_diarization,
+            model=model,
            use_enhanced=use_enhanced,
-            # model=model,
            )

        self.operation = self.speech_client.long_running_recognize(config,