Add support for other languages.

Add CLI options for specifying language as well as for getting a list of
supported languages.
Add abstract method on vendor.py for `language_list`.
Manually add list of language codes (`_language_list` and `language_list`) to
Amazon.
Manually add list of language codes to Google.
Add 'name' attribute to each Transcriber
This commit is contained in:
2019-03-25 07:50:39 +01:00
parent c97ec79ece
commit d8f90cd98a
4 changed files with 57 additions and 4 deletions

View File

@@ -87,6 +87,15 @@ def status(job_name):
break
@cli.command()
@click.argument('service_name', type=click.Choice(vendors.SERVICES))
def languages(service_name):
service = get_service(service_name)
languages_string = "\n" + "\n".join(service.language_list())
click.echo(
f'{service.name} supports {languages_string}')
@cli.command()
@click.option('--punctuation', is_flag=True, default=True,
help='only for Google Speech, defaults to True')
@@ -98,6 +107,8 @@ def status(job_name):
help='only for Google Speech, defaults to "phone_call"')
@click.option('--use-enhanced', is_flag=True, default=True,
help='only for Google Speech, defaults to True')
@click.option('--language-code', default='en-US',
help='only for google and amazon, defaults to en-US')
@click.argument('media_filepath', type=str)
@click.argument('service_name', type=str)
def this(media_filepath,
@@ -106,7 +117,9 @@ def this(media_filepath,
speaker_id,
num_speakers,
model,
use_enhanced):
use_enhanced,
language_code,
):
"""Sends a media file to be transcribed."""
if service_name == 'google':
transcribe_kwargs = dict(
@@ -115,14 +128,17 @@ def this(media_filepath,
model=model,
use_enhanced=use_enhanced,
num_speakers=num_speakers,
language_code=language_code,
)
elif service_name == 'amazon':
transcribe_kwargs = dict(
enable_speaker_diarization=speaker_id,
num_speakers=num_speakers,
language_code=language_code,
)
else:
transcribe_kwargs = {}
try:
service = get_service(service_name)
except KeyError as e:

View File

@@ -27,12 +27,16 @@ def _check_for_config() -> bool:
class Transcriber(TranscriberBaseClass):
name = NAME
cost_per_15_seconds = .024 / 4
bucket_names = {'media': BUCKET_NAME_MEDIA,
'transcript': BUCKET_NAME_TRANSCRIPT}
no_config_error_message = 'please run "aws configure" first'
transcript_type = TRANSCRIPT_TYPE
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/transcribe.html
_language_list = ['en-US', 'es-US', 'en-AU', 'fr-CA', 'en-GB', 'de-DE',
'pt-BR', 'fr-FR', 'it-IT', 'ko-KR']
if _check_for_config():
tr = boto3.client('transcribe')

View File

@@ -29,6 +29,7 @@ def _check_for_config():
class Transcriber(TranscriberBaseClass):
name = NAME
SUPPORTED_FORMATS = ['flac']
cost_per_15_seconds = [.004, .006, .009]
no_config_error_message = (
@@ -37,6 +38,25 @@ class Transcriber(TranscriberBaseClass):
'environment variable "GOOGLE_APPLICATION_CREDENTIALS"'
)
transcript_type = TRANSCRIPT_TYPE
# https://cloud.google.com/speech-to-text/docs/languages
# Array.from(document.querySelector('.devsite-table-wrapper').querySelectorAll('table tr')).slice(1).map(row => row.children[1].innerText)
_language_list = [
'af-ZA', 'am-ET', 'hy-AM', 'az-AZ', 'id-ID', 'ms-MY',
'bn-BD', 'bn-IN', 'ca-ES', 'cs-CZ', 'da-DK', 'de-DE', 'en-AU', 'en-CA',
'en-GH', 'en-GB', 'en-IN', 'en-IE', 'en-KE', 'en-NZ', 'en-NG', 'en-PH',
'en-SG', 'en-ZA', 'en-TZ', 'en-US', 'es-AR', 'es-BO', 'es-CL', 'es-CO',
'es-CR', 'es-EC', 'es-SV', 'es-ES', 'es-US', 'es-GT', 'es-HN', 'es-MX',
'es-NI', 'es-PA', 'es-PY', 'es-PE', 'es-PR', 'es-DO', 'es-UY', 'es-VE',
'eu-ES', 'fil-PH', 'fr-CA', 'fr-FR', 'gl-ES', 'ka-GE', 'gu-IN', 'hr-HR',
'zu-ZA', 'is-IS', 'it-IT', 'jv-ID', 'kn-IN', 'km-KH', 'lo-LA', 'lv-LV',
'lt-LT', 'hu-HU', 'ml-IN', 'mr-IN', 'nl-NL', 'ne-NP', 'nb-NO', 'pl-PL',
'pt-BR', 'pt-PT', 'ro-RO', 'si-LK', 'sk-SK', 'sl-SI', 'su-ID', 'sw-TZ',
'sw-KE', 'fi-FI', 'sv-SE', 'ta-IN', 'ta-SG', 'ta-LK', 'ta-MY', 'te-IN',
'vi-VN', 'tr-TR', 'ur-PK', 'ur-IN', 'el-GR', 'bg-BG', 'ru-RU', 'sr-RS',
'uk-UA', 'he-IL', 'ar-IL', 'ar-JO', 'ar-AE', 'ar-BH', 'ar-DZ', 'ar-SA',
'ar-IQ', 'ar-KW', 'ar-MA', 'ar-TN', 'ar-OM', 'ar-PS', 'ar-QA', 'ar-LB',
'ar-EG', 'fa-IR', 'hi-IN', 'th-TH', 'ko-KR', 'zh-TW', 'yue-Hant-HK',
'ja-JP', 'zh-HK', 'zh']
if _check_for_config():
speech_client = speech.SpeechClient()
@@ -115,6 +135,9 @@ class Transcriber(TranscriberBaseClass):
content = audio_file.read()
audio = speech.types.RecognitionAudio(content=content)
if language_code != 'en-US':
model = None
config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.FLAC,
sample_rate_hertz=sample_rate,

View File

@@ -31,6 +31,11 @@ class TranscriberBaseClass:
def transcript_type(self):
pass
@property
@abc.abstractmethod
def _language_list(self):
pass
@property
@abc.abstractmethod
def cost_per_15_seconds(self):
@@ -42,9 +47,9 @@ class TranscriberBaseClass:
if not cls.check_for_config():
raise exceptions.ConfigError(cls.no_config_error_message)
@staticmethod
@classmethod
@abc.abstractmethod
def check_for_config() -> bool:
def check_for_config(cls) -> bool:
pass
@abc.abstractmethod
@@ -61,9 +66,14 @@ class TranscriberBaseClass:
"""Returns the job_name"""
pass
@classmethod
def language_list(cls) -> List[str]:
return sorted(cls._language_list)
@classmethod
@abc.abstractmethod
def retrieve_transcript(transcription_job_name: str) -> Union[str, dict]:
def retrieve_transcript(cls, transcription_job_name: str
) -> Union[str, dict]:
pass
@classmethod