updated README, setup.py, and fixed #27

This commit is contained in:
2019-03-06 23:22:36 -05:00
parent e19128f46c
commit 2767f2ff55
5 changed files with 34 additions and 10 deletions

View File

@@ -10,7 +10,9 @@ tatt is a CLI for creating and managing speech-to-text transcripts.
## Dependencies
An AWS account (the only supported STT provider as of Feb 12, 2019), and a recording to transcribe!
1. A recording to transcribe.
2. a) An AWS account or b) a Google Cloud account with the speech-to-text API and
Cloud Storage enabled.
## Usage
@@ -27,6 +29,7 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
get Downloads and/or saves completed transcript.
list Lists available STT services.
services Lists available speech-to-text services.
status Check the status of a transcription job.
this Sends a media file to be transcribed.
### List All STT Services
@@ -34,7 +37,8 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
Here are all the available speech-to-text services:
amazon (60 minutes per month for the first 12 months)
amazon $0.006 per 15 seconds
google $0.009 per 15 seconds
### Get Something Transcribed
$ transcribe this <path_to_media_file> <service_name>
@@ -51,6 +55,7 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
amazon messed_up.mp4 FAILED
amazon done_test.mp3 COMPLETED
amazon also_done.MP3 COMPLETED
google hey_there.mp3 COMPLETED
$ transcribe list <job_name>
@@ -85,10 +90,10 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
## Services Supported
- [Amazon Transcribe](https://aws.amazon.com/transcribe/)
- [Google Speech](https://cloud.google.com/speech-to-text/)
### Planned
- [Watson](https://www.ibm.com/watson/services/speech-to-text/)
- [Google Speech](https://cloud.google.com/speech-to-text/)
- [Kaldi](https://github.com/kaldi-asr/kaldi) [ and/or things built on it ](https://github.com/lowerquality/gentle)
- [Speechmatics](https://www.speechmatics.com/)
- [Mozilla's new open-source STT thing](https://github.com/mozilla/DeepSpeech)

View File

@@ -6,7 +6,7 @@ with open('README.md') as file:
setup(
name="tatt",
version="0.972",
version="0.973",
py_modules=['tatt'],
url='https://github.com/zevaverbach/tatt',
install_requires=[
@@ -14,6 +14,9 @@ setup(
'awscli',
'boto3',
'requests',
'google-cloud-speech',
'audioread',
'google-cloud-storage',
],
include_package_data=True,
packages=find_packages(),

View File

@@ -87,6 +87,15 @@ def get_transcription_jobs(
jobs = service.get_transcription_jobs(job_name_query=name,
status=status)
if jobs:
if name:
exact_match_jobs = []
for job in jobs:
if job['name'] == name:
exact_match_jobs.append(job)
if not exact_match_jobs:
continue
else:
jobs = exact_match_jobs
all_jobs[stt_name] = jobs
if name and len(all_jobs) == 0:

View File

@@ -79,8 +79,9 @@ def status(job_name):
raise click.ClickException('no job by that name')
for job_list in jobs.values():
for job in job_list:
click.echo(job['status'])
click.echo(f'{job["name"]}\t{job["status"]}')
break
break
@cli.command()

View File

@@ -165,8 +165,14 @@ class Transcriber(TranscriberBaseClass):
blob.upload_from_filename(path)
@classmethod
def get_transcription_jobs(cls, job_name_query, status) -> List[dict]:
return [
{'name': t.name, 'status': 'COMPLETED'}
for t in cls.transcript_bucket.list_blobs()
]
def get_transcription_jobs(cls, job_name_query=None, status=None) -> List[dict]:
del status
jobs = []
for t in cls.transcript_bucket.list_blobs():
if job_name_query is not None and t.name != job_name_query:
continue
jobs.append({'name': t.name, 'status': 'COMPLETED'})
return jobs