From d8f90cd98aaaf7c99ead8b7bc6a67a74b9269f3c Mon Sep 17 00:00:00 2001 From: zevav Date: Mon, 25 Mar 2019 07:50:39 +0100 Subject: [PATCH] Add support for other languages. Add CLI options for specifying language as well as for getting a list of supported languages. Add abstract method on vendor.py for `language_list`. Manually add list of language codes (`_language_list` and `language_list`) to Amazon. Manually add list of language codes to Google. Add 'name' attribute to each Transcriber --- tatt/transcribe.py | 18 +++++++++++++++++- tatt/vendors/amazon.py | 4 ++++ tatt/vendors/google.py | 23 +++++++++++++++++++++++ tatt/vendors/vendor.py | 16 +++++++++++++--- 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/tatt/transcribe.py b/tatt/transcribe.py index 0a04d7d..60ab4ed 100644 --- a/tatt/transcribe.py +++ b/tatt/transcribe.py @@ -87,6 +87,15 @@ def status(job_name): break +@cli.command() +@click.argument('service_name', type=click.Choice(vendors.SERVICES)) +def languages(service_name): + service = get_service(service_name) + languages_string = "\n" + "\n".join(service.language_list()) + click.echo( + f'{service.name} supports {languages_string}') + + @cli.command() @click.option('--punctuation', is_flag=True, default=True, help='only for Google Speech, defaults to True') @@ -98,6 +107,8 @@ def status(job_name): help='only for Google Speech, defaults to "phone_call"') @click.option('--use-enhanced', is_flag=True, default=True, help='only for Google Speech, defaults to True') +@click.option('--language-code', default='en-US', + help='only for google and amazon, defaults to en-US') @click.argument('media_filepath', type=str) @click.argument('service_name', type=str) def this(media_filepath, @@ -106,7 +117,9 @@ def this(media_filepath, speaker_id, num_speakers, model, - use_enhanced): + use_enhanced, + language_code, + ): """Sends a media file to be transcribed.""" if service_name == 'google': transcribe_kwargs = dict( @@ -115,14 +128,17 @@ def this(media_filepath, model=model, use_enhanced=use_enhanced, num_speakers=num_speakers, + language_code=language_code, ) elif service_name == 'amazon': transcribe_kwargs = dict( enable_speaker_diarization=speaker_id, num_speakers=num_speakers, + language_code=language_code, ) else: transcribe_kwargs = {} + try: service = get_service(service_name) except KeyError as e: diff --git a/tatt/vendors/amazon.py b/tatt/vendors/amazon.py index 6ed9509..2d67dc2 100644 --- a/tatt/vendors/amazon.py +++ b/tatt/vendors/amazon.py @@ -27,12 +27,16 @@ def _check_for_config() -> bool: class Transcriber(TranscriberBaseClass): + name = NAME cost_per_15_seconds = .024 / 4 bucket_names = {'media': BUCKET_NAME_MEDIA, 'transcript': BUCKET_NAME_TRANSCRIPT} no_config_error_message = 'please run "aws configure" first' transcript_type = TRANSCRIPT_TYPE + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/transcribe.html + _language_list = ['en-US', 'es-US', 'en-AU', 'fr-CA', 'en-GB', 'de-DE', + 'pt-BR', 'fr-FR', 'it-IT', 'ko-KR'] if _check_for_config(): tr = boto3.client('transcribe') diff --git a/tatt/vendors/google.py b/tatt/vendors/google.py index 789f63a..ddb8396 100644 --- a/tatt/vendors/google.py +++ b/tatt/vendors/google.py @@ -29,6 +29,7 @@ def _check_for_config(): class Transcriber(TranscriberBaseClass): + name = NAME SUPPORTED_FORMATS = ['flac'] cost_per_15_seconds = [.004, .006, .009] no_config_error_message = ( @@ -37,6 +38,25 @@ class Transcriber(TranscriberBaseClass): 'environment variable "GOOGLE_APPLICATION_CREDENTIALS"' ) transcript_type = TRANSCRIPT_TYPE + # https://cloud.google.com/speech-to-text/docs/languages + # Array.from(document.querySelector('.devsite-table-wrapper').querySelectorAll('table tr')).slice(1).map(row => row.children[1].innerText) + _language_list = [ + 'af-ZA', 'am-ET', 'hy-AM', 'az-AZ', 'id-ID', 'ms-MY', + 'bn-BD', 'bn-IN', 'ca-ES', 'cs-CZ', 'da-DK', 'de-DE', 'en-AU', 'en-CA', + 'en-GH', 'en-GB', 'en-IN', 'en-IE', 'en-KE', 'en-NZ', 'en-NG', 'en-PH', + 'en-SG', 'en-ZA', 'en-TZ', 'en-US', 'es-AR', 'es-BO', 'es-CL', 'es-CO', + 'es-CR', 'es-EC', 'es-SV', 'es-ES', 'es-US', 'es-GT', 'es-HN', 'es-MX', + 'es-NI', 'es-PA', 'es-PY', 'es-PE', 'es-PR', 'es-DO', 'es-UY', 'es-VE', + 'eu-ES', 'fil-PH', 'fr-CA', 'fr-FR', 'gl-ES', 'ka-GE', 'gu-IN', 'hr-HR', + 'zu-ZA', 'is-IS', 'it-IT', 'jv-ID', 'kn-IN', 'km-KH', 'lo-LA', 'lv-LV', + 'lt-LT', 'hu-HU', 'ml-IN', 'mr-IN', 'nl-NL', 'ne-NP', 'nb-NO', 'pl-PL', + 'pt-BR', 'pt-PT', 'ro-RO', 'si-LK', 'sk-SK', 'sl-SI', 'su-ID', 'sw-TZ', + 'sw-KE', 'fi-FI', 'sv-SE', 'ta-IN', 'ta-SG', 'ta-LK', 'ta-MY', 'te-IN', + 'vi-VN', 'tr-TR', 'ur-PK', 'ur-IN', 'el-GR', 'bg-BG', 'ru-RU', 'sr-RS', + 'uk-UA', 'he-IL', 'ar-IL', 'ar-JO', 'ar-AE', 'ar-BH', 'ar-DZ', 'ar-SA', + 'ar-IQ', 'ar-KW', 'ar-MA', 'ar-TN', 'ar-OM', 'ar-PS', 'ar-QA', 'ar-LB', + 'ar-EG', 'fa-IR', 'hi-IN', 'th-TH', 'ko-KR', 'zh-TW', 'yue-Hant-HK', + 'ja-JP', 'zh-HK', 'zh'] if _check_for_config(): speech_client = speech.SpeechClient() @@ -115,6 +135,9 @@ class Transcriber(TranscriberBaseClass): content = audio_file.read() audio = speech.types.RecognitionAudio(content=content) + if language_code != 'en-US': + model = None + config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.FLAC, sample_rate_hertz=sample_rate, diff --git a/tatt/vendors/vendor.py b/tatt/vendors/vendor.py index 82c392b..badc239 100644 --- a/tatt/vendors/vendor.py +++ b/tatt/vendors/vendor.py @@ -31,6 +31,11 @@ class TranscriberBaseClass: def transcript_type(self): pass + @property + @abc.abstractmethod + def _language_list(self): + pass + @property @abc.abstractmethod def cost_per_15_seconds(self): @@ -42,9 +47,9 @@ class TranscriberBaseClass: if not cls.check_for_config(): raise exceptions.ConfigError(cls.no_config_error_message) - @staticmethod + @classmethod @abc.abstractmethod - def check_for_config() -> bool: + def check_for_config(cls) -> bool: pass @abc.abstractmethod @@ -61,9 +66,14 @@ class TranscriberBaseClass: """Returns the job_name""" pass + @classmethod + def language_list(cls) -> List[str]: + return sorted(cls._language_list) + @classmethod @abc.abstractmethod - def retrieve_transcript(transcription_job_name: str) -> Union[str, dict]: + def retrieve_transcript(cls, transcription_job_name: str + ) -> Union[str, dict]: pass @classmethod