Snippets

David Macias Using AWS Transcribe to get IVR prompts verbiage

Updated by David Macias

File app.py Modified

  • Ignore whitespace
  • Hide word diff
 
 # enumerate local files recursively
 print("Checking files.")
-
-# enumerate local files recursively
 for root, dirs, files in os.walk(local_directory):
     for filename in files:
         if filename.endswith(file_extension):
Updated by David Macias

File .env Added

  • Ignore whitespace
  • Hide word diff
+REGION=us-west-2

File app.py Added

  • Ignore whitespace
  • Hide word diff
+from __future__ import print_function
+
+import boto3
+import uuid
+import logging
+import sys
+import os
+import time
+import json
+import urllib.request
+import pandas
+
+from botocore.exceptions import ClientError
+from dotenv import load_dotenv
+
+load_dotenv()
+
+local_directory = 'sourcePrompts/'
+file_extension = '.wav'
+media_format = 'wav'
+language_code = 'en-US'
+
+def create_unique_bucket_name(bucket_prefix):
+    # The generated bucket name must be between 3 and 63 chars long
+    return ''.join([bucket_prefix, str(uuid.uuid4())])
+
+def create_bucket(bucket_prefix, s3_connection):
+    bucket_name = create_unique_bucket_name(bucket_prefix)
+    if region == "us-east-1":
+        bucket_response = s3_connection.create_bucket(
+            Bucket=bucket_name,
+        )
+    else:
+        location = {"LocationConstraint": region}
+        bucket_response = s3_connection.create_bucket(
+            Bucket=bucket_name,
+            CreateBucketConfiguration=location
+        )
+
+    # print(bucket_name, current_region)
+    return bucket_name, bucket_response
+
+def delete_all_objects(bucket_name):
+    res = []
+    bucket = s3Resource.Bucket(bucket_name)
+    for obj_version in bucket.object_versions.all():
+        res.append({'Key': obj_version.object_key,
+                    'VersionId': obj_version.id})
+    # print(res)
+    if res == []:
+        print("Bucket is empty.")
+        return
+
+    print("Deleting objects in bucket.")
+    bucket.delete_objects(Delete={'Objects': res})
+
+session = boto3.session.Session(profile_name='YOUR_PROFILE_NAME')
+
+region = os.getenv("REGION")
+print("Region: "+region)
+s3Client = session.client('s3', region_name=region)
+s3Resource = session.resource('s3')
+transcribe = session.client('transcribe', region_name=region)
+data_frame =  pandas.DataFrame()
+
+# Create bucket
+bucket_name, first_response = create_bucket(
+    bucket_prefix = 'transcription-',
+    s3_connection = s3Client)
+
+print("Bucket created: %s" % bucket_name)
+
+print("Checking bucket.")
+for bucket in s3Resource.buckets.all():
+    if bucket.name == bucket_name:
+        print("Bucket ready.")
+        good_to_go = True
+
+if not good_to_go:
+    print("Error with bucket.")
+    quit()
+
+# enumerate local files recursively
+print("Checking files.")
+
+# enumerate local files recursively
+for root, dirs, files in os.walk(local_directory):
+    for filename in files:
+        if filename.endswith(file_extension):
+            # construct the full local path
+            local_path = os.path.join(root, filename)
+            print("Local path: %s" % local_path)
+            # construct the full Dropbox path
+            relative_path = os.path.relpath(local_path, local_directory)
+            print("File name: %s" % relative_path)
+            s3_path = local_path
+            print("Searching for %s in bucket %s" % (s3_path, bucket_name))
+            try:
+                s3Client.head_object(Bucket=bucket_name, Key=s3_path)
+                print("Path found on bucket. Skipping %s..." % s3_path)
+            except:
+                print("File not found.")
+                print("Uploading %s..." % s3_path)
+                s3Client.upload_file(local_path, bucket_name, s3_path)
+                job_name = bucket_name+"_"+relative_path 
+                print("Job Name: "+job_name)
+                job_uri = "s3://%s/%s" % (
+                    bucket_name, s3_path)
+                print("Job URI: "+job_uri)
+                transcribe.start_transcription_job(
+                    TranscriptionJobName=job_name,
+                    Media={'MediaFileUri': job_uri},
+                    MediaFormat=media_format,
+                    LanguageCode=language_code)
+                while True:
+                    status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
+                    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
+                        break
+                    print('Transcription ' + status['TranscriptionJob']['TranscriptionJobStatus'])
+                    time.sleep(10)
+                if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED']:
+                    response = urllib.request.urlopen(status['TranscriptionJob']['Transcript']['TranscriptFileUri'])
+                    data = json.loads(response.read())
+                    text = data['results']['transcripts'][0]['transcript']
+                    print("%s, %s "%(job_name, text))
+                    data_frame = data_frame.append({"Prompt Name":relative_path, "Verbiage":text}, ignore_index=True)
+                if status['TranscriptionJob']['TranscriptionJobStatus'] in ['FAILED']:
+                    data_frame = data_frame.append({"Prompt Name":relative_path, "Verbiage":"ERROR"}, ignore_index=True)
+                print("Deleting transcription job.")
+                status = transcribe.delete_transcription_job(TranscriptionJobName=job_name)
+
+#Create csv
+print("Writing CSV")
+data_frame.to_csv('prompts.csv', index=False)
+
+# Empty bucket
+print("Emptying bucket.")
+delete_all_objects(bucket_name)
+
+# Delete empty bucket
+s3Resource.Bucket(bucket_name).delete()
+print("Bucket deleted.")

File snippet.BUILD Deleted

  • Ignore whitespace
  • Hide word diff
-from __future__ import print_function
-from botocore.exceptions import ClientError
-
-import boto3
-import uuid
-import logging
-import sys
-import os
-import time
-import json
-import urllib.request
-import pandas
-
-local_directory = 'French/'
-file_extension = '.wav'
-media_format = 'wav'
-language_code = 'fr-CA'
-
-def create_unique_bucket_name(bucket_prefix):
-    # The generated bucket name must be between 3 and 63 chars long
-    return ''.join([bucket_prefix, str(uuid.uuid4())])
-
-def create_bucket(bucket_prefix, s3_connection):
-    session = boto3.session.Session()
-    current_region = session.region_name
-    bucket_name = create_unique_bucket_name(bucket_prefix)
-    bucket_response = s3_connection.create_bucket(
-        Bucket=bucket_name,
-    )
-    # print(bucket_name, current_region)
-    return bucket_name, bucket_response
-
-def delete_all_objects(bucket_name):
-    res = []
-    bucket = s3Resource.Bucket(bucket_name)
-    for obj_version in bucket.object_versions.all():
-        res.append({'Key': obj_version.object_key,
-                    'VersionId': obj_version.id})
-    # print(res)
-    bucket.delete_objects(Delete={'Objects': res})
-
-s3Client = boto3.client('s3')
-s3Resource = boto3.resource('s3')
-transcribe = boto3.client('transcribe')
-data_frame =  pandas.DataFrame()
-
-# Create bucket
-bucket_name, first_response = create_bucket(
-    bucket_prefix = 'transcription-',
-    s3_connection = s3Client)
-
-print("Bucket created %s" % bucket_name)
-
-print("Checking bucket.")
-for bucket in s3Resource.buckets.all():
-    if bucket.name == bucket_name:
-        print("Bucket ready.")
-        good_to_go = True
-
-if not good_to_go:
-    print("Error with bucket.")
-    quit()
-
-# enumerate local files recursively
-for root, dirs, files in os.walk(local_directory):
-    for filename in files:
-        if filename.endswith(file_extension):
-            # construct the full local path
-            local_path = os.path.join(root, filename)
-            print("Local path: %s" % local_path)
-            # construct the full Dropbox path
-            relative_path = os.path.relpath(local_path, local_directory)
-            print("File name: %s" % relative_path)
-            s3_path = local_path
-            print("Searching for %s in bucket %s" % (s3_path, bucket_name))
-            try:
-                s3Client.head_object(Bucket=bucket_name, Key=s3_path)
-                print("Path found on bucket. Skipping %s..." % s3_path)
-            except:
-                print("Uploading %s..." % s3_path)
-                s3Client.upload_file(local_path, bucket_name, s3_path)
-                job_name = relative_path 
-                job_uri = "https://%s.s3.amazonaws.com/%s" % (
-                    bucket_name, s3_path)
-                transcribe.start_transcription_job(
-                    TranscriptionJobName=job_name,
-                    Media={'MediaFileUri': job_uri},
-                    MediaFormat=media_format,
-                    LanguageCode=language_code
-                )
-                while True:
-                    status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
-                    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
-                        break
-                    print('Transcription ' + status['TranscriptionJob']['TranscriptionJobStatus'])
-                    time.sleep(25)
-                print('Transcription ' + status['TranscriptionJob']['TranscriptionJobStatus'])
-                response = urllib.request.urlopen(status['TranscriptionJob']['Transcript']['TranscriptFileUri'])
-                data = json.loads(response.read())
-                text = data['results']['transcripts'][0]['transcript']
-                print("%s, %s "%(job_name, text))
-                data_frame = data_frame.append({"Prompt Name":job_name, "Verbiage":text}, ignore_index=True)
-                print("Deleting transcription job.")
-                status = transcribe.delete_transcription_job(TranscriptionJobName=job_name)
-
-#Create csv
-print("Writing CSV")
-data_frame.to_csv('prompts.csv', index=False)
-
-# Empty bucket
-print("Emptying bucket.")
-delete_all_objects(bucket_name)
-
-# Delete empty bucket
-s3Resource.Bucket(bucket_name).delete()
-print("Bucket deleted.")
Created by David Macias

File snippet.BUILD Added

  • Ignore whitespace
  • Hide word diff
+from __future__ import print_function
+from botocore.exceptions import ClientError
+
+import boto3
+import uuid
+import logging
+import sys
+import os
+import time
+import json
+import urllib.request
+import pandas
+
+local_directory = 'French/'
+file_extension = '.wav'
+media_format = 'wav'
+language_code = 'fr-CA'
+
+def create_unique_bucket_name(bucket_prefix):
+    # The generated bucket name must be between 3 and 63 chars long
+    return ''.join([bucket_prefix, str(uuid.uuid4())])
+
+def create_bucket(bucket_prefix, s3_connection):
+    session = boto3.session.Session()
+    current_region = session.region_name
+    bucket_name = create_unique_bucket_name(bucket_prefix)
+    bucket_response = s3_connection.create_bucket(
+        Bucket=bucket_name,
+    )
+    # print(bucket_name, current_region)
+    return bucket_name, bucket_response
+
+def delete_all_objects(bucket_name):
+    res = []
+    bucket = s3Resource.Bucket(bucket_name)
+    for obj_version in bucket.object_versions.all():
+        res.append({'Key': obj_version.object_key,
+                    'VersionId': obj_version.id})
+    # print(res)
+    bucket.delete_objects(Delete={'Objects': res})
+
+s3Client = boto3.client('s3')
+s3Resource = boto3.resource('s3')
+transcribe = boto3.client('transcribe')
+data_frame =  pandas.DataFrame()
+
+# Create bucket
+bucket_name, first_response = create_bucket(
+    bucket_prefix = 'transcription-',
+    s3_connection = s3Client)
+
+print("Bucket created %s" % bucket_name)
+
+print("Checking bucket.")
+for bucket in s3Resource.buckets.all():
+    if bucket.name == bucket_name:
+        print("Bucket ready.")
+        good_to_go = True
+
+if not good_to_go:
+    print("Error with bucket.")
+    quit()
+
+# enumerate local files recursively
+for root, dirs, files in os.walk(local_directory):
+    for filename in files:
+        if filename.endswith(file_extension):
+            # construct the full local path
+            local_path = os.path.join(root, filename)
+            print("Local path: %s" % local_path)
+            # construct the full Dropbox path
+            relative_path = os.path.relpath(local_path, local_directory)
+            print("File name: %s" % relative_path)
+            s3_path = local_path
+            print("Searching for %s in bucket %s" % (s3_path, bucket_name))
+            try:
+                s3Client.head_object(Bucket=bucket_name, Key=s3_path)
+                print("Path found on bucket. Skipping %s..." % s3_path)
+            except:
+                print("Uploading %s..." % s3_path)
+                s3Client.upload_file(local_path, bucket_name, s3_path)
+                job_name = relative_path 
+                job_uri = "https://%s.s3.amazonaws.com/%s" % (
+                    bucket_name, s3_path)
+                transcribe.start_transcription_job(
+                    TranscriptionJobName=job_name,
+                    Media={'MediaFileUri': job_uri},
+                    MediaFormat=media_format,
+                    LanguageCode=language_code
+                )
+                while True:
+                    status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
+                    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
+                        break
+                    print('Transcription ' + status['TranscriptionJob']['TranscriptionJobStatus'])
+                    time.sleep(25)
+                print('Transcription ' + status['TranscriptionJob']['TranscriptionJobStatus'])
+                response = urllib.request.urlopen(status['TranscriptionJob']['Transcript']['TranscriptFileUri'])
+                data = json.loads(response.read())
+                text = data['results']['transcripts'][0]['transcript']
+                print("%s, %s "%(job_name, text))
+                data_frame = data_frame.append({"Prompt Name":job_name, "Verbiage":text}, ignore_index=True)
+                print("Deleting transcription job.")
+                status = transcribe.delete_transcription_job(TranscriptionJobName=job_name)
+
+#Create csv
+print("Writing CSV")
+data_frame.to_csv('prompts.csv', index=False)
+
+# Empty bucket
+print("Emptying bucket.")
+delete_all_objects(bucket_name)
+
+# Delete empty bucket
+s3Resource.Bucket(bucket_name).delete()
+print("Bucket deleted.")
HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.