updated README, setup.py, and fixed #27
This commit is contained in:
11
README.md
11
README.md
@@ -10,7 +10,9 @@ tatt is a CLI for creating and managing speech-to-text transcripts.
|
||||
|
||||
## Dependencies
|
||||
|
||||
An AWS account (the only supported STT provider as of Feb 12, 2019), and a recording to transcribe!
|
||||
1. A recording to transcribe.
|
||||
2. a) An AWS account or b) a Google Cloud account with the speech-to-text API and
|
||||
Cloud Storage enabled.
|
||||
|
||||
|
||||
## Usage
|
||||
@@ -27,6 +29,7 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
|
||||
get Downloads and/or saves completed transcript.
|
||||
list Lists available STT services.
|
||||
services Lists available speech-to-text services.
|
||||
status Check the status of a transcription job.
|
||||
this Sends a media file to be transcribed.
|
||||
|
||||
### List All STT Services
|
||||
@@ -34,7 +37,8 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
|
||||
|
||||
Here are all the available speech-to-text services:
|
||||
|
||||
amazon (60 minutes per month for the first 12 months)
|
||||
amazon $0.006 per 15 seconds
|
||||
google $0.009 per 15 seconds
|
||||
|
||||
### Get Something Transcribed
|
||||
$ transcribe this <path_to_media_file> <service_name>
|
||||
@@ -51,6 +55,7 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
|
||||
amazon messed_up.mp4 FAILED
|
||||
amazon done_test.mp3 COMPLETED
|
||||
amazon also_done.MP3 COMPLETED
|
||||
google hey_there.mp3 COMPLETED
|
||||
|
||||
|
||||
$ transcribe list <job_name>
|
||||
@@ -85,10 +90,10 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
|
||||
## Services Supported
|
||||
|
||||
- [Amazon Transcribe](https://aws.amazon.com/transcribe/)
|
||||
- [Google Speech](https://cloud.google.com/speech-to-text/)
|
||||
|
||||
### Planned
|
||||
- [Watson](https://www.ibm.com/watson/services/speech-to-text/)
|
||||
- [Google Speech](https://cloud.google.com/speech-to-text/)
|
||||
- [Kaldi](https://github.com/kaldi-asr/kaldi) [ and/or things built on it ](https://github.com/lowerquality/gentle)
|
||||
- [Speechmatics](https://www.speechmatics.com/)
|
||||
- [Mozilla's new open-source STT thing](https://github.com/mozilla/DeepSpeech)
|
||||
|
||||
5
setup.py
5
setup.py
@@ -6,7 +6,7 @@ with open('README.md') as file:
|
||||
|
||||
setup(
|
||||
name="tatt",
|
||||
version="0.972",
|
||||
version="0.973",
|
||||
py_modules=['tatt'],
|
||||
url='https://github.com/zevaverbach/tatt',
|
||||
install_requires=[
|
||||
@@ -14,6 +14,9 @@ setup(
|
||||
'awscli',
|
||||
'boto3',
|
||||
'requests',
|
||||
'google-cloud-speech',
|
||||
'audioread',
|
||||
'google-cloud-storage',
|
||||
],
|
||||
include_package_data=True,
|
||||
packages=find_packages(),
|
||||
|
||||
@@ -87,6 +87,15 @@ def get_transcription_jobs(
|
||||
jobs = service.get_transcription_jobs(job_name_query=name,
|
||||
status=status)
|
||||
if jobs:
|
||||
if name:
|
||||
exact_match_jobs = []
|
||||
for job in jobs:
|
||||
if job['name'] == name:
|
||||
exact_match_jobs.append(job)
|
||||
if not exact_match_jobs:
|
||||
continue
|
||||
else:
|
||||
jobs = exact_match_jobs
|
||||
all_jobs[stt_name] = jobs
|
||||
|
||||
if name and len(all_jobs) == 0:
|
||||
|
||||
@@ -79,8 +79,9 @@ def status(job_name):
|
||||
raise click.ClickException('no job by that name')
|
||||
for job_list in jobs.values():
|
||||
for job in job_list:
|
||||
click.echo(job['status'])
|
||||
click.echo(f'{job["name"]}\t{job["status"]}')
|
||||
break
|
||||
break
|
||||
|
||||
|
||||
@cli.command()
|
||||
|
||||
16
tatt/vendors/google.py
vendored
16
tatt/vendors/google.py
vendored
@@ -165,8 +165,14 @@ class Transcriber(TranscriberBaseClass):
|
||||
blob.upload_from_filename(path)
|
||||
|
||||
@classmethod
|
||||
def get_transcription_jobs(cls, job_name_query, status) -> List[dict]:
|
||||
return [
|
||||
{'name': t.name, 'status': 'COMPLETED'}
|
||||
for t in cls.transcript_bucket.list_blobs()
|
||||
]
|
||||
def get_transcription_jobs(cls, job_name_query=None, status=None) -> List[dict]:
|
||||
del status
|
||||
|
||||
jobs = []
|
||||
|
||||
for t in cls.transcript_bucket.list_blobs():
|
||||
if job_name_query is not None and t.name != job_name_query:
|
||||
continue
|
||||
jobs.append({'name': t.name, 'status': 'COMPLETED'})
|
||||
|
||||
return jobs
|
||||
|
||||
Reference in New Issue
Block a user