Skip to content

Instantly share code, notes, and snippets.

@nmolivo
Last active December 4, 2017 04:48
Show Gist options
  • Save nmolivo/67dd8aeac9ad0115ae125af745d07b99 to your computer and use it in GitHub Desktop.
Save nmolivo/67dd8aeac9ad0115ae125af745d07b99 to your computer and use it in GitHub Desktop.
AWS Rekognition Image Tagger
bucket_name = 'bucket_name'
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
images = [img.key for img in bucket.objects.all()] #fetches image names from your S3 bucket
client = boto3.client('rekognition')
results_wide = []
results_long = []
for img in images:
img_dict_wide = {'img': img}
#print(img)
try:
labels = client.detect_labels(Image={'S3Object':{'Bucket':bucket_name,'Name':img}},MinConfidence=75)
if 'Labels' in labels:
for l, label in enumerate(labels['Labels']):
results_long.append({'img': img, 'type': 'Label', 'label': label['Name'],
'confidence': label['Confidence']})
col = 'label_' + str(l)
img_dict_wide[col] = label['Name']
img_dict_wide[col + '_confidence'] = label['Confidence']
except:
continue
try:
celebrities = client.recognize_celebrities(Image={'S3Object':{'Bucket':bucket_name,'Name':img}})
if 'CelebrityFaces' in celebrities:
for f, face in enumerate(celebrities['CelebrityFaces']):
results_long.append({'img': img, 'type': 'Celebrity', 'label': face['Name'],
'confidence': face['Face']['Confidence']})
col = 'celeb_' + str(f)
img_dict_wide[col] = face['Name']
img_dict_wide[col + '_confidence'] = face['Face']['Confidence']
except:
continue
try:
text_in_image = client.detect_text(Image={'S3Object':{'Bucket':bucket_name,'Name':img}})
if "TextDetections" in text_in_image:
for w, word in enumerate(text_in_image["TextDetections"]):
results_long.append({'img': img, 'type': "Text", 'label': word["DetectedText"],
'confidence': word["Confidence"]})
col = 'word_' + str(w)
img_dict_wide[col] = word["DetectedText"]
img_dict_wide[col+ '_confidence'] = word["Confidence"]
except:
continue
if 'Labels' not in labels and 'CelebrityFaces' not in celebrities and "TextDetections" not in text_in_image:
results_long.append({'img': img, 'type': None, 'label': None, 'confidence': None})
results_wide.append(img_dict_wide)
####
####
img_df_long = pd.DataFrame(results_long, columns=['img', 'type', 'label', 'confidence'])
img_df_wide = pd.DataFrame(results_wide)
cols = sorted(img_df_wide.columns)
cols.remove('img')
img_df_wide = img_df_wide[['img'] + cols]
####
####
# save down your dfs.
#For our topic modelers only focused on images data!
img_df_long.to_csv("twitter_img_text_long.csv")
#For mapping to the dataframe initially provided to us.
img_df_wide.to_csv("twitter_img_text_wide.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment