added speaker diarization for amazon, not in CLI yet
This commit is contained in:
2
setup.py
2
setup.py
@@ -6,7 +6,7 @@ with open('README.md') as file:
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="tatt",
|
name="tatt",
|
||||||
version="0.974",
|
version="0.975",
|
||||||
py_modules=['tatt'],
|
py_modules=['tatt'],
|
||||||
url='https://github.com/zevaverbach/tatt',
|
url='https://github.com/zevaverbach/tatt',
|
||||||
install_requires=[
|
install_requires=[
|
||||||
|
|||||||
24
tatt/vendors/amazon.py
vendored
24
tatt/vendors/amazon.py
vendored
@@ -70,10 +70,10 @@ class Transcriber(TranscriberBaseClass):
|
|||||||
def make_bucket(cls, bucket_name):
|
def make_bucket(cls, bucket_name):
|
||||||
cls.s3.create_bucket(Bucket=bucket_name)
|
cls.s3.create_bucket(Bucket=bucket_name)
|
||||||
|
|
||||||
def transcribe(self) -> str:
|
def transcribe(self, **kwargs) -> str:
|
||||||
self._upload_file()
|
self._upload_file()
|
||||||
try:
|
try:
|
||||||
return self._request_transcription()
|
return self._request_transcription(**kwargs)
|
||||||
except self.tr.exceptions.ConflictException:
|
except self.tr.exceptions.ConflictException:
|
||||||
raise exceptions.AlreadyExistsError(
|
raise exceptions.AlreadyExistsError(
|
||||||
f'{self.basename} already exists on {NAME}')
|
f'{self.basename} already exists on {NAME}')
|
||||||
@@ -83,9 +83,15 @@ class Transcriber(TranscriberBaseClass):
|
|||||||
str(self.filepath),
|
str(self.filepath),
|
||||||
self.basename)
|
self.basename)
|
||||||
|
|
||||||
def _request_transcription(self, language_code='en-US') -> str:
|
def _request_transcription(
|
||||||
|
self,
|
||||||
|
language_code='en-US',
|
||||||
|
num_speakers=2,
|
||||||
|
enable_speaker_diarization=True,
|
||||||
|
) -> str:
|
||||||
job_name = self.basename
|
job_name = self.basename
|
||||||
self.tr.start_transcription_job(
|
|
||||||
|
kwargs = dict(
|
||||||
TranscriptionJobName=job_name,
|
TranscriptionJobName=job_name,
|
||||||
LanguageCode=language_code,
|
LanguageCode=language_code,
|
||||||
MediaFormat=self.basename.split('.')[-1].lower(),
|
MediaFormat=self.basename.split('.')[-1].lower(),
|
||||||
@@ -94,6 +100,16 @@ class Transcriber(TranscriberBaseClass):
|
|||||||
},
|
},
|
||||||
OutputBucketName=self.bucket_names['transcript']
|
OutputBucketName=self.bucket_names['transcript']
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if enable_speaker_diarization:
|
||||||
|
kwargs.update(dict(
|
||||||
|
Settings={
|
||||||
|
'ShowSpeakerLabels': True,
|
||||||
|
'MaxSpeakerLabels': num_speakers,
|
||||||
|
}
|
||||||
|
))
|
||||||
|
|
||||||
|
self.tr.start_transcription_job(**kwargs)
|
||||||
return job_name
|
return job_name
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
Reference in New Issue
Block a user