+from __future__ import print_function
+from botocore.exceptions import ClientError
+from dotenv import load_dotenv
+local_directory = 'sourcePrompts/'
+def create_unique_bucket_name(bucket_prefix):
+ # The generated bucket name must be between 3 and 63 chars long
+ return ''.join([bucket_prefix, str(uuid.uuid4())])
+def create_bucket(bucket_prefix, s3_connection):
+ bucket_name = create_unique_bucket_name(bucket_prefix)
+ if region == "us-east-1":
+ bucket_response = s3_connection.create_bucket(
+ location = {"LocationConstraint": region}
+ bucket_response = s3_connection.create_bucket(
+ CreateBucketConfiguration=location
+ # print(bucket_name, current_region)
+ return bucket_name, bucket_response
+def delete_all_objects(bucket_name):
+ bucket = s3Resource.Bucket(bucket_name)
+ for obj_version in bucket.object_versions.all():
+ res.append({'Key': obj_version.object_key,
+ 'VersionId': obj_version.id})
+ print("Bucket is empty.")
+ print("Deleting objects in bucket.")
+ bucket.delete_objects(Delete={'Objects': res})
+session = boto3.session.Session(profile_name='YOUR_PROFILE_NAME')
+region = os.getenv("REGION")
+print("Region: "+region)
+s3Client = session.client('s3', region_name=region)
+s3Resource = session.resource('s3')
+transcribe = session.client('transcribe', region_name=region)
+data_frame = pandas.DataFrame()
+bucket_name, first_response = create_bucket(
+ bucket_prefix = 'transcription-',
+ s3_connection = s3Client)
+print("Bucket created: %s" % bucket_name)
+print("Checking bucket.")
+for bucket in s3Resource.buckets.all():
+ if bucket.name == bucket_name:
+ print("Error with bucket.")
+# enumerate local files recursively
+print("Checking files.")
+# enumerate local files recursively
+for root, dirs, files in os.walk(local_directory):
+ if filename.endswith(file_extension):
+ # construct the full local path
+ local_path = os.path.join(root, filename)
+ print("Local path: %s" % local_path)
+ # construct the full Dropbox path
+ relative_path = os.path.relpath(local_path, local_directory)
+ print("File name: %s" % relative_path)
+ print("Searching for %s in bucket %s" % (s3_path, bucket_name))
+ s3Client.head_object(Bucket=bucket_name, Key=s3_path)
+ print("Path found on bucket. Skipping %s..." % s3_path)
+ print("File not found.")
+ print("Uploading %s..." % s3_path)
+ s3Client.upload_file(local_path, bucket_name, s3_path)
+ job_name = bucket_name+"_"+relative_path
+ print("Job Name: "+job_name)
+ job_uri = "s3://%s/%s" % (
+ print("Job URI: "+job_uri)
+ transcribe.start_transcription_job(
+ TranscriptionJobName=job_name,
+ Media={'MediaFileUri': job_uri},
+ MediaFormat=media_format,
+ LanguageCode=language_code)
+ status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
+ if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
+ print('Transcription ' + status['TranscriptionJob']['TranscriptionJobStatus'])
+ if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED']:
+ response = urllib.request.urlopen(status['TranscriptionJob']['Transcript']['TranscriptFileUri'])
+ data = json.loads(response.read())
+ text = data['results']['transcripts'][0]['transcript']
+ print("%s, %s "%(job_name, text))
+ data_frame = data_frame.append({"Prompt Name":relative_path, "Verbiage":text}, ignore_index=True)
+ if status['TranscriptionJob']['TranscriptionJobStatus'] in ['FAILED']:
+ data_frame = data_frame.append({"Prompt Name":relative_path, "Verbiage":"ERROR"}, ignore_index=True)
+ print("Deleting transcription job.")
+ status = transcribe.delete_transcription_job(TranscriptionJobName=job_name)
+data_frame.to_csv('prompts.csv', index=False)
+print("Emptying bucket.")
+delete_all_objects(bucket_name)
+s3Resource.Bucket(bucket_name).delete()
+print("Bucket deleted.")