diff --git a/Pipfile b/Pipfile index c50dea4..d3d79b9 100644 --- a/Pipfile +++ b/Pipfile @@ -7,6 +7,10 @@ verify_ssl = true [packages] requests = "*" +awscli = "*" +boto3 = "*" +click = "*" +tatt = {editable = true,path = "."} [requires] python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock index 247ee84..bbbdb18 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "bb57e0d7853b45999e47c163c46b95bc2fde31c527d8d7b5b5539dc979444a6d" + "sha256": "80cec15bc1119ce4635c01c8595743c2dd3c78c667fe051cc55e5420e7ee83f4" }, "pipfile-spec": 6, "requires": { @@ -16,6 +16,29 @@ ] }, "default": { + "awscli": { + "hashes": [ + "sha256:92d8637f1c65252d586f6e88a521f1b809d2e6895b92a072a95fb9ccf32d22a3", + "sha256:a9dd44db98f70c449bdd2ba27098e9c8023bdfdf93bba1183294be52c6156a69" + ], + "index": "pypi", + "version": "==1.16.99" + }, + "boto3": { + "hashes": [ + "sha256:465b4da5d292373f9ec5bb8834f26251a5f464f2ce9da1756988c16bb5e49cff", + "sha256:6ca40ef1893eacb37a3696bb2a5739a9b33a7d978658b451f4d87729cb5ec576" + ], + "index": "pypi", + "version": "==1.9.89" + }, + "botocore": { + "hashes": [ + "sha256:2257dc1c012f535ef364b6b60fc9fdc822605fafd6765c3095385528669260aa", + "sha256:b0b9f204cbba3ad7a523f7b274e2d0ca252384e0c114fdfe94c00eb205fb2537" + ], + "version": "==1.12.89" + }, "certifi": { "hashes": [ "sha256:47f9c83ef4c0c621eaef743f133f09fa8a74a9b75f037e8624f83bd1b6626cb7", @@ -30,6 +53,29 @@ ], "version": "==3.0.4" }, + "click": { + "hashes": [ + "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", + "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7" + ], + "index": "pypi", + "version": "==7.0" + }, + "colorama": { + "hashes": [ + "sha256:463f8483208e921368c9f306094eb6f725c6ca42b0f97e313cb5d5512459feda", + "sha256:48eb22f4f8461b1df5734a074b57042430fb06e1d61bd1e11b078c0fe6d7a1f1" + ], + "version": "==0.3.9" + }, + "docutils": { + "hashes": [ + "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6", + "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274", + "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6" + ], + "version": "==0.14" + }, "idna": { "hashes": [ "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", @@ -37,6 +83,44 @@ ], "version": "==2.8" }, + "jmespath": { + "hashes": [ + "sha256:6a81d4c9aa62caf061cb517b4d9ad1dd300374cd4706997aff9cd6aedd61fc64", + "sha256:f11b4461f425740a1d908e9a3f7365c3d2e569f6ca68a2ff8bc5bcd9676edd63" + ], + "version": "==0.9.3" + }, + "pyasn1": { + "hashes": [ + "sha256:da2420fe13a9452d8ae97a0e478adde1dee153b11ba832a95b223a2ba01c10f7", + "sha256:da6b43a8c9ae93bc80e2739efb38cc776ba74a886e3e9318d65fe81a8b8a2c6e" + ], + "version": "==0.4.5" + }, + "python-dateutil": { + "hashes": [ + "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", + "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e" + ], + "markers": "python_version >= '2.7'", + "version": "==2.8.0" + }, + "pyyaml": { + "hashes": [ + "sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b", + "sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf", + "sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a", + "sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3", + "sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1", + "sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1", + "sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613", + "sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04", + "sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f", + "sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537", + "sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531" + ], + "version": "==3.13" + }, "requests": { "hashes": [ "sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e", @@ -45,11 +129,37 @@ "index": "pypi", "version": "==2.21.0" }, + "rsa": { + "hashes": [ + "sha256:25df4e10c263fb88b5ace923dd84bf9aa7f5019687b5e55382ffcdb8bede9db5", + "sha256:43f682fea81c452c98d09fc316aae12de6d30c4b5c84226642cf8f8fd1c93abd" + ], + "version": "==3.4.2" + }, + "s3transfer": { + "hashes": [ + "sha256:7b9ad3213bff7d357f888e0fab5101b56fa1a0548ee77d121c3a3dbfbef4cb2e", + "sha256:f23d5cb7d862b104401d9021fc82e5fa0e0cf57b7660a1331425aab0c691d021" + ], + "version": "==0.2.0" + }, + "six": { + "hashes": [ + "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", + "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + ], + "version": "==1.12.0" + }, + "tatt": { + "editable": true, + "path": "." + }, "urllib3": { "hashes": [ "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39", "sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22" ], + "markers": "python_version >= '3.4'", "version": "==1.24.1" } }, diff --git a/config.py b/config.py new file mode 100644 index 0000000..94082a6 --- /dev/null +++ b/config.py @@ -0,0 +1,18 @@ +import os + +from tatt.vendors import ( + amazon, + ) + + +STT_SERVICES = { + 'amazon': { + 'cost_per_minute': .024, + 'free': '60_minutes_per_month_for_the_first_12_months', + 'function': amazon.transcribe, + }, + } + + +DEFAULT_BUCKET_NAME_FORMATTER = 'tatt_{}' +AWS_CREDENTIALS_FILEPATH = os.getenv('AWS_CREDENTIALS_FILEPATH') or '~/.aws/credentials' diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ba87bba --- /dev/null +++ b/setup.py @@ -0,0 +1,18 @@ +from setuptools import setup + + +setup( + name="tatt", + version="0.1", + py_modules=['tatt'], + install_requires=[ + 'Click', + 'awscli', + 'boto3', + 'requests', + ], + entry_points=''' + [console_scripts] + transcribe=transcribe:cli + ''', + ) diff --git a/tatt/vendors/__init__.py b/tatt/vendors/__init__.py new file mode 100644 index 0000000..6f7f7cc --- /dev/null +++ b/tatt/vendors/__init__.py @@ -0,0 +1 @@ +from tatt.vendors import amazon diff --git a/tatt/vendors/amazon.py b/tatt/vendors/amazon.py new file mode 100644 index 0000000..6ba1a91 --- /dev/null +++ b/tatt/vendors/amazon.py @@ -0,0 +1,61 @@ +import os +from pathlib import PurePath +from subprocess import check_output + +import boto3 + +import config + +NAME = 'amazon' +BUCKET_NAME = config.DEFAULT_BUCKET_NAME_FORMATTER.format(NAME)): + + + +class ConfigError(Exception): + pass + + +class Transcribe: + + bucket_name = BUCKET_NAME + + def __init__(self, filepath): + self._setup() + self.s3 = boto3.resource('s3') + self.filepath = PurePath(filepath) + + def setup(self): + if not check_for_credentials(): + make_credentials() and check_for_credentials() or raise ConfigError + if not self.check_for_bucket(): + self.make_bucket() + + def check_for_bucket(self): + return bool(self.s3.Bucket(self.bucket_name).creation_date) + + def make_bucket(self): + s3.create_bucket(Bucket=self.bucket_name) + + def transcribe(self): + upload_file(self.filepath) + self.request_transcription() + + def upload_file(self): + basename = os.path.basename(filepath) + s3.Bucket(bucket_name).upload_file(filepath, basename) + return basename + + def request_transcription(self): + + + +def check_for_credentials(): + os.path.exists(config.AWS_CREDENTIALS_FILEPATH) + + +def make_credentials(): + shell_call('aws configure') + + +def shell_call(command): + return check_output(command, shell=True) diff --git a/transcribe.py b/transcribe.py new file mode 100644 index 0000000..48bc85c --- /dev/null +++ b/transcribe.py @@ -0,0 +1,60 @@ +import sys + +import click + +from config import STT_SERVICES + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.option('-f', '--free-only', is_flag=True) +def services(free_only): + """Lists available speech-to-text services.""" + print_all_services(free_only) + + +@cli.command() +@click.option('-d', '--dry-run', default=False, help=( + 'Do a dry run without actually submitting the media file for transcription')) +@click.argument('media_filepath', type=click.File('r')) +@click.argument('service_name', type=str) +def this(dry_run, media_filepath, service_name): + """Transcribe All The Things!™""" + if service_name not in STT_SERVICES: + print() + raise click.ClickException( + f'No such service! {print_all_services(print_=False)}') + if dry_run: + print('If this weren\'t a dry run, I would transcribe ' + f'{media_filepath.name} using {service_name}') + print(STT_SERVICES[service_name]) + else: + print( + f'Okay, transcribing {media_filepath.name} using {service_name}...') + print(STT_SERVICES[service_name]['function']) + + +def print_all_services(free_only=False, print_=True): + # TODO: make a jinja template for this + all_services_string = ( + '\n\nHere are all the available ' + + f'{"free " if free_only else ""}speech-to-text services:' + + '\n\n' + + '\n'.join(['{}{}{}{}'.format('\t', service_name, '\t\t', + + f'({info["free"].replace("_", " ")})' + if isinstance(info["free"], str) else "" + + ) + + for service_name, info in + STT_SERVICES.items()]) + + '\n' + ) + if print_: + print(all_services_string) + return all_services_string