updated README, setup.py, and fixed #27

2019-03-06 23:22:36 -05:00
parent e19128f46c
commit 2767f2ff55
5 changed files with 34 additions and 10 deletions
--- a/README.md
+++ b/README.md
@@ -10,7 +10,9 @@ tatt is a CLI for creating and managing speech-to-text transcripts.

 ## Dependencies

-An AWS account (the only supported STT provider as of Feb 12, 2019), and a recording to transcribe!
+1. A recording to transcribe.
+2. a) An AWS account or b) a Google Cloud account with the speech-to-text API and
+   Cloud Storage enabled.


 ## Usage
@@ -27,6 +29,7 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
      get       Downloads and/or saves completed transcript.
      list      Lists available STT services.
      services  Lists available speech-to-text services.
+      status    Check the status of a transcription job.
      this      Sends a media file to be transcribed.

 ### List All STT Services
@@ -34,7 +37,8 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor

    Here are all the available speech-to-text services:

-      amazon		(60 minutes per month for the first 12 months)
+      amazon		$0.006 per 15 seconds
+      google		$0.009 per 15 seconds

 ### Get Something Transcribed
    $ transcribe this <path_to_media_file> <service_name>
@@ -51,6 +55,7 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
    amazon  messed_up.mp4                           FAILED
    amazon  done_test.mp3                           COMPLETED
    amazon  also_done.MP3                           COMPLETED
+    google  hey_there.mp3                           COMPLETED


    $ transcribe list <job_name>
@@ -85,10 +90,10 @@ An AWS account (the only supported STT provider as of Feb 12, 2019), and a recor
 ## Services Supported

  - [Amazon Transcribe](https://aws.amazon.com/transcribe/)
+  - [Google Speech](https://cloud.google.com/speech-to-text/)

 ### Planned
  - [Watson](https://www.ibm.com/watson/services/speech-to-text/) 
-  - [Google Speech](https://cloud.google.com/speech-to-text/)
  - [Kaldi](https://github.com/kaldi-asr/kaldi) [ and/or things built on it ](https://github.com/lowerquality/gentle)
  - [Speechmatics](https://www.speechmatics.com/)
  - [Mozilla's new open-source STT thing](https://github.com/mozilla/DeepSpeech)
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@ with open('README.md') as file:

 setup(
    name="tatt",
-    version="0.972",
+    version="0.973",
    py_modules=['tatt'],
    url='https://github.com/zevaverbach/tatt',
    install_requires=[
@@ -14,6 +14,9 @@ setup(
        'awscli',
        'boto3',
        'requests',
+        'google-cloud-speech',
+        'audioread',
+        'google-cloud-storage',
        ],
    include_package_data=True,
    packages=find_packages(),
--- a/tatt/helpers.py
+++ b/tatt/helpers.py
@@ -87,6 +87,15 @@ def get_transcription_jobs(
            jobs = service.get_transcription_jobs(job_name_query=name,
                                                  status=status)
            if jobs:
+                if name:
+                    exact_match_jobs = []
+                    for job in jobs:
+                        if job['name'] == name:
+                            exact_match_jobs.append(job)
+                    if not exact_match_jobs:
+                        continue
+                    else:
+                        jobs = exact_match_jobs
                all_jobs[stt_name] = jobs

    if name and len(all_jobs) == 0:
--- a/tatt/transcribe.py
+++ b/tatt/transcribe.py
@@ -79,8 +79,9 @@ def status(job_name):
        raise click.ClickException('no job by that name')
    for job_list in jobs.values():
        for job in job_list:
-            click.echo(job['status'])
+            click.echo(f'{job["name"]}\t{job["status"]}')
            break
+        break


@cli.command()
--- a/tatt/vendors/google.py
+++ b/tatt/vendors/google.py
@@ -165,8 +165,14 @@ class Transcriber(TranscriberBaseClass):
        blob.upload_from_filename(path)

    @classmethod
-    def get_transcription_jobs(cls, job_name_query, status) -> List[dict]:
-        return [
-                    {'name': t.name, 'status': 'COMPLETED'}
-                    for t in cls.transcript_bucket.list_blobs()
-            ]
+    def get_transcription_jobs(cls, job_name_query=None, status=None) -> List[dict]:
+        del status
+
+        jobs = []
+
+        for t in cls.transcript_bucket.list_blobs():
+            if job_name_query is not None and t.name != job_name_query:
+                continue
+            jobs.append({'name': t.name, 'status': 'COMPLETED'})
+
+        return jobs