CLI is working for submitting jobs to Amazon. started retrieval and listing functionality
This commit is contained in:
5
Pipfile
5
Pipfile
@@ -6,11 +6,12 @@ verify_ssl = true
|
|||||||
[dev-packages]
|
[dev-packages]
|
||||||
|
|
||||||
[packages]
|
[packages]
|
||||||
requests = "*"
|
|
||||||
awscli = "*"
|
|
||||||
boto3 = "*"
|
boto3 = "*"
|
||||||
|
awscli = "*"
|
||||||
|
pytest = "*"
|
||||||
click = "*"
|
click = "*"
|
||||||
tatt = {editable = true,path = "."}
|
tatt = {editable = true,path = "."}
|
||||||
|
ipython = "*"
|
||||||
|
|
||||||
[requires]
|
[requires]
|
||||||
python_version = "3.7"
|
python_version = "3.7"
|
||||||
|
|||||||
168
Pipfile.lock
generated
168
Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"_meta": {
|
"_meta": {
|
||||||
"hash": {
|
"hash": {
|
||||||
"sha256": "80cec15bc1119ce4635c01c8595743c2dd3c78c667fe051cc55e5420e7ee83f4"
|
"sha256": "006d8177b930549d4028114a64abd22e8f5ba739d3d61751813138e3f0922854"
|
||||||
},
|
},
|
||||||
"pipfile-spec": 6,
|
"pipfile-spec": 6,
|
||||||
"requires": {
|
"requires": {
|
||||||
@@ -16,28 +16,57 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"default": {
|
"default": {
|
||||||
|
"appnope": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0",
|
||||||
|
"sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71"
|
||||||
|
],
|
||||||
|
"markers": "sys_platform == 'darwin'",
|
||||||
|
"version": "==0.1.0"
|
||||||
|
},
|
||||||
|
"atomicwrites": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4",
|
||||||
|
"sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6"
|
||||||
|
],
|
||||||
|
"version": "==1.3.0"
|
||||||
|
},
|
||||||
|
"attrs": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:10cbf6e27dbce8c30807caf056c8eb50917e0eaafe86347671b57254006c3e69",
|
||||||
|
"sha256:ca4be454458f9dec299268d472aaa5a11f67a4ff70093396e1ceae9c76cf4bbb"
|
||||||
|
],
|
||||||
|
"version": "==18.2.0"
|
||||||
|
},
|
||||||
"awscli": {
|
"awscli": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:92d8637f1c65252d586f6e88a521f1b809d2e6895b92a072a95fb9ccf32d22a3",
|
"sha256:165ebffb2ff10d0a40fdc985f08bc7a93e08ef7a8f8f68d6f76211935806d43f",
|
||||||
"sha256:a9dd44db98f70c449bdd2ba27098e9c8023bdfdf93bba1183294be52c6156a69"
|
"sha256:28d457973c97bbe154574ba8902e1c47335a80cfb74836bd49d1e3632104fbda"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==1.16.99"
|
"version": "==1.16.102"
|
||||||
|
},
|
||||||
|
"backcall": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4",
|
||||||
|
"sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2"
|
||||||
|
],
|
||||||
|
"version": "==0.1.0"
|
||||||
},
|
},
|
||||||
"boto3": {
|
"boto3": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:465b4da5d292373f9ec5bb8834f26251a5f464f2ce9da1756988c16bb5e49cff",
|
"sha256:2bcda6aa7cbc51a30fc49f9129500c4df8b92fee3b4a44562c9d595bf32c4dcd",
|
||||||
"sha256:6ca40ef1893eacb37a3696bb2a5739a9b33a7d978658b451f4d87729cb5ec576"
|
"sha256:609900ca26f379123911b51ced68e437322ff3c347deaac7d84a53710d612c2c"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
"index": "pypi",
|
||||||
"version": "==1.9.89"
|
"version": "==1.9.92"
|
||||||
},
|
},
|
||||||
"botocore": {
|
"botocore": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:2257dc1c012f535ef364b6b60fc9fdc822605fafd6765c3095385528669260aa",
|
"sha256:19a48491bb0f22ea95f26ed3bd9ca9e0cd35aadf04027774995817d6403abec9",
|
||||||
"sha256:b0b9f204cbba3ad7a523f7b274e2d0ca252384e0c114fdfe94c00eb205fb2537"
|
"sha256:97a43a70876dae5ebe4334db8ea846181467b80adc45f681720c9bb859491bf5"
|
||||||
],
|
],
|
||||||
"version": "==1.12.89"
|
"version": "==1.12.92"
|
||||||
},
|
},
|
||||||
"certifi": {
|
"certifi": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
@@ -68,6 +97,13 @@
|
|||||||
],
|
],
|
||||||
"version": "==0.3.9"
|
"version": "==0.3.9"
|
||||||
},
|
},
|
||||||
|
"decorator": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:33cd704aea07b4c28b3eb2c97d288a06918275dac0ecebdaf1bc8a48d98adb9e",
|
||||||
|
"sha256:cabb249f4710888a2fc0e13e9a16c343d932033718ff62e1e9bc93a9d3a9122b"
|
||||||
|
],
|
||||||
|
"version": "==4.3.2"
|
||||||
|
},
|
||||||
"docutils": {
|
"docutils": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
|
"sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
|
||||||
@@ -83,6 +119,28 @@
|
|||||||
],
|
],
|
||||||
"version": "==2.8"
|
"version": "==2.8"
|
||||||
},
|
},
|
||||||
|
"ipython": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:6a9496209b76463f1dec126ab928919aaf1f55b38beb9219af3fe202f6bbdd12",
|
||||||
|
"sha256:f69932b1e806b38a7818d9a1e918e5821b685715040b48e59c657b3c7961b742"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==7.2.0"
|
||||||
|
},
|
||||||
|
"ipython-genutils": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
|
||||||
|
"sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
|
||||||
|
],
|
||||||
|
"version": "==0.2.0"
|
||||||
|
},
|
||||||
|
"jedi": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:571702b5bd167911fe9036e5039ba67f820d6502832285cde8c881ab2b2149fd",
|
||||||
|
"sha256:c8481b5e59d34a5c7c42e98f6625e633f6ef59353abea6437472c7ec2093f191"
|
||||||
|
],
|
||||||
|
"version": "==0.13.2"
|
||||||
|
},
|
||||||
"jmespath": {
|
"jmespath": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:6a81d4c9aa62caf061cb517b4d9ad1dd300374cd4706997aff9cd6aedd61fc64",
|
"sha256:6a81d4c9aa62caf061cb517b4d9ad1dd300374cd4706997aff9cd6aedd61fc64",
|
||||||
@@ -90,6 +148,64 @@
|
|||||||
],
|
],
|
||||||
"version": "==0.9.3"
|
"version": "==0.9.3"
|
||||||
},
|
},
|
||||||
|
"more-itertools": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:0125e8f60e9e031347105eb1682cef932f5e97d7b9a1a28d9bf00c22a5daef40",
|
||||||
|
"sha256:590044e3942351a1bdb1de960b739ff4ce277960f2425ad4509446dbace8d9d1"
|
||||||
|
],
|
||||||
|
"version": "==6.0.0"
|
||||||
|
},
|
||||||
|
"parso": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:6ecf7244be8e7283ec9009c72d074830e7e0e611c974f813d76db0390a4e0dd6",
|
||||||
|
"sha256:8162be7570ffb34ec0b8d215d7f3b6c5fab24f51eb3886d6dee362de96b6db94"
|
||||||
|
],
|
||||||
|
"version": "==0.3.3"
|
||||||
|
},
|
||||||
|
"pexpect": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba",
|
||||||
|
"sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b"
|
||||||
|
],
|
||||||
|
"markers": "sys_platform != 'win32'",
|
||||||
|
"version": "==4.6.0"
|
||||||
|
},
|
||||||
|
"pickleshare": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
|
||||||
|
"sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
|
||||||
|
],
|
||||||
|
"version": "==0.7.5"
|
||||||
|
},
|
||||||
|
"pluggy": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:8ddc32f03971bfdf900a81961a48ccf2fb677cf7715108f85295c67405798616",
|
||||||
|
"sha256:980710797ff6a041e9a73a5787804f848996ecaa6f8a1b1e08224a5894f2074a"
|
||||||
|
],
|
||||||
|
"version": "==0.8.1"
|
||||||
|
},
|
||||||
|
"prompt-toolkit": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:88002cc618cacfda8760c4539e76c3b3f148ecdb7035a3d422c7ecdc90c2a3ba",
|
||||||
|
"sha256:c6655a12e9b08edb8cf5aeab4815fd1e1bdea4ad73d3bbf269cf2e0c4eb75d5e",
|
||||||
|
"sha256:df5835fb8f417aa55e5cafadbaeb0cf630a1e824aad16989f9f0493e679ec010"
|
||||||
|
],
|
||||||
|
"version": "==2.0.8"
|
||||||
|
},
|
||||||
|
"ptyprocess": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
|
||||||
|
"sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
|
||||||
|
],
|
||||||
|
"version": "==0.6.0"
|
||||||
|
},
|
||||||
|
"py": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:bf92637198836372b520efcba9e020c330123be8ce527e535d185ed4b6f45694",
|
||||||
|
"sha256:e76826342cefe3c3d5f7e8ee4316b80d1dd8a300781612ddbc765c17ba25a6c6"
|
||||||
|
],
|
||||||
|
"version": "==1.7.0"
|
||||||
|
},
|
||||||
"pyasn1": {
|
"pyasn1": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:da2420fe13a9452d8ae97a0e478adde1dee153b11ba832a95b223a2ba01c10f7",
|
"sha256:da2420fe13a9452d8ae97a0e478adde1dee153b11ba832a95b223a2ba01c10f7",
|
||||||
@@ -97,6 +213,21 @@
|
|||||||
],
|
],
|
||||||
"version": "==0.4.5"
|
"version": "==0.4.5"
|
||||||
},
|
},
|
||||||
|
"pygments": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a",
|
||||||
|
"sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d"
|
||||||
|
],
|
||||||
|
"version": "==2.3.1"
|
||||||
|
},
|
||||||
|
"pytest": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:65aeaa77ae87c7fc95de56285282546cfa9c886dc8e5dc78313db1c25e21bc07",
|
||||||
|
"sha256:6ac6d467d9f053e95aaacd79f831dbecfe730f419c6c7022cb316b365cd9199d"
|
||||||
|
],
|
||||||
|
"index": "pypi",
|
||||||
|
"version": "==4.2.0"
|
||||||
|
},
|
||||||
"python-dateutil": {
|
"python-dateutil": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
|
"sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb",
|
||||||
@@ -119,14 +250,13 @@
|
|||||||
"sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537",
|
"sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537",
|
||||||
"sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531"
|
"sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531"
|
||||||
],
|
],
|
||||||
"version": ">=4.2b1"
|
"version": "==3.13"
|
||||||
},
|
},
|
||||||
"requests": {
|
"requests": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",
|
"sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",
|
||||||
"sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b"
|
"sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b"
|
||||||
],
|
],
|
||||||
"index": "pypi",
|
|
||||||
"version": "==2.21.0"
|
"version": "==2.21.0"
|
||||||
},
|
},
|
||||||
"rsa": {
|
"rsa": {
|
||||||
@@ -154,6 +284,13 @@
|
|||||||
"editable": true,
|
"editable": true,
|
||||||
"path": "."
|
"path": "."
|
||||||
},
|
},
|
||||||
|
"traitlets": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835",
|
||||||
|
"sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9"
|
||||||
|
],
|
||||||
|
"version": "==4.3.2"
|
||||||
|
},
|
||||||
"urllib3": {
|
"urllib3": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
|
"sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
|
||||||
@@ -161,6 +298,13 @@
|
|||||||
],
|
],
|
||||||
"markers": "python_version >= '3.4'",
|
"markers": "python_version >= '3.4'",
|
||||||
"version": "==1.24.1"
|
"version": "==1.24.1"
|
||||||
|
},
|
||||||
|
"wcwidth": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
|
||||||
|
"sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c"
|
||||||
|
],
|
||||||
|
"version": "==0.1.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"develop": {}
|
"develop": {}
|
||||||
|
|||||||
18
config.py
18
config.py
@@ -1,18 +1,22 @@
|
|||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
from tatt.vendors import (
|
import sqlite3
|
||||||
amazon,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
STT_SERVICES = {
|
STT_SERVICES = {
|
||||||
'amazon': {
|
'amazon': {
|
||||||
'cost_per_minute': .024,
|
'cost_per_minute': .024,
|
||||||
'free': '60_minutes_per_month_for_the_first_12_months',
|
'free': '60_minutes_per_month_for_the_first_12_months',
|
||||||
'function': amazon.transcribe,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_BUCKET_NAME_FORMATTER = 'tatt_{}'
|
AWS_BUCKET_NAME_FMTR_MEDIA = 'tatt-media-{}'
|
||||||
AWS_CREDENTIALS_FILEPATH = os.getenv('AWS_CREDENTIALS_FILEPATH') or '~/.aws/credentials'
|
AWS_BUCKET_NAME_FMTR_TRANSCRIPT = 'tatt-transcript-{}'
|
||||||
|
AWS_CREDENTIALS_FILEPATH = (
|
||||||
|
os.getenv('AWS_CREDENTIALS_FILEPATH')
|
||||||
|
or Path.home() / '.aws/credentials'
|
||||||
|
)
|
||||||
|
AWS_REGION = 'us-east-1'
|
||||||
|
|
||||||
|
SERVICE_CLASS_NAME = 'transcribe'
|
||||||
|
|||||||
23
helpers.py
Normal file
23
helpers.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import config
|
||||||
|
|
||||||
|
|
||||||
|
def print_all_services(free_only=False, print_=True):
|
||||||
|
# TODO: make a jinja template for this
|
||||||
|
all_services_string = (
|
||||||
|
'\n\nHere are all the available ' +
|
||||||
|
f'{"free " if free_only else ""}speech-to-text services:' +
|
||||||
|
'\n\n' +
|
||||||
|
'\n'.join(['{}{}{}{}'.format('\t', service_name, '\t\t',
|
||||||
|
|
||||||
|
f'({info["free"].replace("_", " ")})'
|
||||||
|
if isinstance(info["free"], str) else ""
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
for service_name, info in
|
||||||
|
config.STT_SERVICES.items()])
|
||||||
|
+ '\n'
|
||||||
|
)
|
||||||
|
if print_:
|
||||||
|
print(all_services_string)
|
||||||
|
return all_services_string
|
||||||
116
tatt/vendors/amazon.py
vendored
116
tatt/vendors/amazon.py
vendored
@@ -1,56 +1,130 @@
|
|||||||
|
import json
|
||||||
import os
|
import os
|
||||||
from pathlib import PurePath
|
from pathlib import PurePath
|
||||||
from subprocess import check_output
|
from subprocess import check_output
|
||||||
|
import uuid
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
import config
|
import config
|
||||||
|
|
||||||
NAME = 'amazon'
|
NAME = 'amazon'
|
||||||
BUCKET_NAME = config.DEFAULT_BUCKET_NAME_FORMATTER.format(NAME)):
|
BUCKET_NAME_MEDIA = config.AWS_BUCKET_NAME_FMTR_MEDIA.format(NAME)
|
||||||
|
BUCKET_NAME_TRANSCRIPT = config.AWS_BUCKET_NAME_FMTR_TRANSCRIPT.format(NAME)
|
||||||
|
tr = boto3.client('transcribe')
|
||||||
|
s3 = boto3.resource('s3')
|
||||||
|
|
||||||
|
|
||||||
class ConfigError(Exception):
|
class ConfigError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Transcribe:
|
class transcribe:
|
||||||
|
|
||||||
bucket_name = BUCKET_NAME
|
bucket_names = {'media': BUCKET_NAME_MEDIA,
|
||||||
|
'transcript': BUCKET_NAME_TRANSCRIPT}
|
||||||
|
|
||||||
def __init__(self, filepath):
|
def __init__(self, filepath):
|
||||||
self._setup()
|
self._setup()
|
||||||
self.s3 = boto3.resource('s3')
|
|
||||||
self.filepath = PurePath(filepath)
|
self.filepath = PurePath(filepath)
|
||||||
|
self.basename = str(os.path.basename(self.filepath))
|
||||||
|
self.media_file_uri = (
|
||||||
|
f"https://s3-{config.AWS_REGION}.amazonaws.com/"
|
||||||
|
f"{self.bucket_names['media']}/{self.basename}")
|
||||||
|
|
||||||
def setup(self):
|
def _setup(self):
|
||||||
if not check_for_credentials():
|
if not check_for_credentials():
|
||||||
make_credentials() and check_for_credentials() or raise ConfigError
|
make_credentials()
|
||||||
if not self.check_for_bucket():
|
if not check_for_credentials():
|
||||||
self.make_bucket()
|
raise ConfigError
|
||||||
|
for bucket_name in self.bucket_names.values():
|
||||||
|
if not self.check_for_bucket(bucket_name):
|
||||||
|
self.make_bucket(bucket_name)
|
||||||
|
|
||||||
def check_for_bucket(self):
|
def check_for_bucket(self, bucket_name):
|
||||||
return bool(self.s3.Bucket(self.bucket_name).creation_date)
|
return bool(s3.Bucket(bucket_name).creation_date)
|
||||||
|
|
||||||
def make_bucket(self):
|
def make_bucket(self, bucket_name):
|
||||||
s3.create_bucket(Bucket=self.bucket_name)
|
s3.create_bucket(Bucket=bucket_name)
|
||||||
|
|
||||||
def transcribe(self):
|
def transcribe(self):
|
||||||
upload_file(self.filepath)
|
self._upload_file()
|
||||||
self.request_transcription()
|
return self._request_transcription()
|
||||||
|
|
||||||
def upload_file(self):
|
def _upload_file(self):
|
||||||
basename = os.path.basename(filepath)
|
s3.Bucket(self.bucket_names['media']).upload_file(
|
||||||
s3.Bucket(bucket_name).upload_file(filepath, basename)
|
str(self.filepath),
|
||||||
return basename
|
self.basename)
|
||||||
|
|
||||||
def request_transcription(self):
|
def _request_transcription(self, language_code='en-US'):
|
||||||
|
job_name = str(uuid.uuid4())
|
||||||
|
tr.start_transcription_job(
|
||||||
|
TranscriptionJobName=job_name,
|
||||||
|
LanguageCode=language_code,
|
||||||
|
MediaFormat=self.basename.split('.')[-1].lower(),
|
||||||
|
Media={
|
||||||
|
'MediaFileUri': self.media_file_uri
|
||||||
|
},
|
||||||
|
OutputBucketName=self.bucket_names['transcript']
|
||||||
|
)
|
||||||
|
return job_name
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_completed_jobs():
|
||||||
|
return transcribe.get_transcription_jobs(status='completed')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_pending_jobs():
|
||||||
|
return transcribe.get_transcription_jobs(status='in_progress')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_all_jobs():
|
||||||
|
return transcribe.get_transcription_jobs()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_transcription_jobs(status=None):
|
||||||
|
kwargs = {'MaxResults': 100}
|
||||||
|
if status is not None:
|
||||||
|
kwargs['Status'] = status.upper()
|
||||||
|
jobs_data = tr.list_transcription_jobs(**kwargs)
|
||||||
|
jobs = homogenize_transcription_job_data(jobs_data['TranscriptionJobSummaries'])
|
||||||
|
while jobs_data.get('NextToken'):
|
||||||
|
jobs_data = tr.list_transcription_jobs(NextToken=jobs_data['NextToken'])
|
||||||
|
jobs += homogenize_transcription_job_data(
|
||||||
|
jobs_data['TranscriptionJobSummaries'])
|
||||||
|
return jobs
|
||||||
|
|
||||||
|
|
||||||
|
def homogenize_transcription_job_data(transcription_job_data):
|
||||||
|
return [{
|
||||||
|
'created': jd['CreationTime'],
|
||||||
|
'name': jd['TranscriptionJobName'],
|
||||||
|
'status': jd['TranscriptionJobStatus']
|
||||||
|
}
|
||||||
|
for jd in transcription_job_data]
|
||||||
|
|
||||||
|
|
||||||
|
def retrieve_transcript(transcription_job_name):
|
||||||
|
job = tr.get_transcription_job(
|
||||||
|
TranscriptionJobName=transcription_job_name
|
||||||
|
)['TranscriptionJob']
|
||||||
|
|
||||||
|
if not job['TranscriptionJobStatus'] == 'COMPLETED':
|
||||||
|
return
|
||||||
|
|
||||||
|
transcript_file_uri = job['Transcript']['TranscriptFileUri']
|
||||||
|
transcript_path = transcript_file_uri.split("amazonaws.com/", 1)[1]
|
||||||
|
|
||||||
|
transcript_bucket = transcript_path.split('/', 1)[0]
|
||||||
|
transcript_key = transcript_path.split('/', 1)[1]
|
||||||
|
|
||||||
|
s3_object = s3.Object(transcript_bucket, transcript_key).get()
|
||||||
|
transcript_json = s3_object['Body'].read().decode('utf-8')
|
||||||
|
return json.loads(transcript_json)
|
||||||
|
|
||||||
|
|
||||||
def check_for_credentials():
|
def check_for_credentials():
|
||||||
os.path.exists(config.AWS_CREDENTIALS_FILEPATH)
|
return config.AWS_CREDENTIALS_FILEPATH.exists()
|
||||||
|
|
||||||
|
|
||||||
def make_credentials():
|
def make_credentials():
|
||||||
|
|||||||
19
tests/test_amazon.py
Normal file
19
tests/test_amazon.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
from tatt.vendors.amazon import transcribe, retrieve_transcript
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def test_transcribe_instantiate():
|
||||||
|
filepath = '/Users/zev/tester.mp3'
|
||||||
|
t = transcribe(filepath)
|
||||||
|
assert str(t.filepath) == filepath
|
||||||
|
assert t.basename == 'tester.mp3'
|
||||||
|
assert t.media_file_uri == (
|
||||||
|
f'https://s3-us-east-1.amazonaws.com/tatt-media-amazon/tester.mp3'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_retrieve():
|
||||||
|
filepath = '/Users/zev/tester.mp3'
|
||||||
|
t = retrieve_transcript('4db6808e-a7e8-4d8d-a1b7-753ab97094dc')
|
||||||
|
print(t)
|
||||||
|
assert t is not None
|
||||||
@@ -1,8 +1,12 @@
|
|||||||
|
from pprint import pprint
|
||||||
|
import sqlite3
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
from config import STT_SERVICES
|
import config
|
||||||
|
import helpers
|
||||||
|
from tatt import vendors
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
@@ -11,50 +15,55 @@ def cli():
|
|||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
@click.option('-f', '--free-only', is_flag=True)
|
@click.argument('uid', required=False)
|
||||||
def services(free_only):
|
def retrieve(name=None, service=None):
|
||||||
"""Lists available speech-to-text services."""
|
pending_jobs = [get_service(service_name).get_pending_jobs(name)
|
||||||
print_all_services(free_only)
|
for service_name, data in config.STT_SERVICES
|
||||||
|
if service is None
|
||||||
|
or service == service_name]
|
||||||
|
if not pending_jobs:
|
||||||
|
click.ClickException('no pending jobs currently!')
|
||||||
|
for job in pending_jobs:
|
||||||
|
print(dict(job))
|
||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
@click.option('-d', '--dry-run', default=False, help=(
|
@click.option('-f', '--free-only', is_flag=True)
|
||||||
|
def services(free_only):
|
||||||
|
"""Lists available speech-to-text services."""
|
||||||
|
helpers.print_all_services(free_only)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.option('-d', '--dry-run', is_flag=True, help=(
|
||||||
'Do a dry run without actually submitting the media file for transcription'))
|
'Do a dry run without actually submitting the media file for transcription'))
|
||||||
@click.argument('media_filepath', type=click.File('r'))
|
@click.argument('media_filepath', type=str)
|
||||||
@click.argument('service_name', type=str)
|
@click.argument('service_name', type=str)
|
||||||
def this(dry_run, media_filepath, service_name):
|
def this(dry_run, media_filepath, service_name):
|
||||||
"""Transcribe All The Things!™"""
|
"""Transcribe All The Things!™"""
|
||||||
if service_name not in STT_SERVICES:
|
if service_name not in config.STT_SERVICES:
|
||||||
print()
|
print()
|
||||||
raise click.ClickException(
|
raise click.ClickException(
|
||||||
f'No such service! {print_all_services(print_=False)}')
|
f'No such service! {print_all_services(print_=False)}')
|
||||||
|
|
||||||
|
service = get_service(service_name)
|
||||||
|
s = service(media_filepath)
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print('If this weren\'t a dry run, I would transcribe '
|
print('If this weren\'t a dry run, I would transcribe '
|
||||||
f'{media_filepath.name} using {service_name}')
|
f'{media_filepath} using {service_name}')
|
||||||
print(STT_SERVICES[service_name])
|
pprint(vars(s))
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
f'Okay, transcribing {media_filepath.name} using {service_name}...')
|
f'Okay, transcribing {media_filepath} using {service_name}...')
|
||||||
print(STT_SERVICES[service_name]['function'])
|
|
||||||
|
job_num = s.transcribe()
|
||||||
|
db.create_pending_job(job_num, s.basename, service_name)
|
||||||
|
print(f'Okay, job {job_num} is being transcribed. Use "retrieve" '
|
||||||
|
'command to download it.')
|
||||||
|
|
||||||
|
|
||||||
def print_all_services(free_only=False, print_=True):
|
def get_service(service_name):
|
||||||
# TODO: make a jinja template for this
|
return getattr(getattr(vendors, service_name), config.SERVICE_CLASS_NAME)
|
||||||
all_services_string = (
|
|
||||||
'\n\nHere are all the available ' +
|
|
||||||
f'{"free " if free_only else ""}speech-to-text services:' +
|
|
||||||
'\n\n' +
|
|
||||||
'\n'.join(['{}{}{}{}'.format('\t', service_name, '\t\t',
|
|
||||||
|
|
||||||
f'({info["free"].replace("_", " ")})'
|
|
||||||
if isinstance(info["free"], str) else ""
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
for service_name, info in
|
|
||||||
STT_SERVICES.items()])
|
|
||||||
+ '\n'
|
|
||||||
)
|
|
||||||
if print_:
|
|
||||||
print(all_services_string)
|
|
||||||
return all_services_string
|
|
||||||
|
|||||||
Reference in New Issue
Block a user