Last active
December 4, 2017 04:48
-
-
Save nmolivo/67dd8aeac9ad0115ae125af745d07b99 to your computer and use it in GitHub Desktop.
AWS Rekognition Image Tagger
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bucket_name = 'bucket_name' | |
s3 = boto3.resource('s3') | |
bucket = s3.Bucket(bucket_name) | |
images = [img.key for img in bucket.objects.all()] #fetches image names from your S3 bucket | |
client = boto3.client('rekognition') | |
results_wide = [] | |
results_long = [] | |
for img in images: | |
img_dict_wide = {'img': img} | |
#print(img) | |
try: | |
labels = client.detect_labels(Image={'S3Object':{'Bucket':bucket_name,'Name':img}},MinConfidence=75) | |
if 'Labels' in labels: | |
for l, label in enumerate(labels['Labels']): | |
results_long.append({'img': img, 'type': 'Label', 'label': label['Name'], | |
'confidence': label['Confidence']}) | |
col = 'label_' + str(l) | |
img_dict_wide[col] = label['Name'] | |
img_dict_wide[col + '_confidence'] = label['Confidence'] | |
except: | |
continue | |
try: | |
celebrities = client.recognize_celebrities(Image={'S3Object':{'Bucket':bucket_name,'Name':img}}) | |
if 'CelebrityFaces' in celebrities: | |
for f, face in enumerate(celebrities['CelebrityFaces']): | |
results_long.append({'img': img, 'type': 'Celebrity', 'label': face['Name'], | |
'confidence': face['Face']['Confidence']}) | |
col = 'celeb_' + str(f) | |
img_dict_wide[col] = face['Name'] | |
img_dict_wide[col + '_confidence'] = face['Face']['Confidence'] | |
except: | |
continue | |
try: | |
text_in_image = client.detect_text(Image={'S3Object':{'Bucket':bucket_name,'Name':img}}) | |
if "TextDetections" in text_in_image: | |
for w, word in enumerate(text_in_image["TextDetections"]): | |
results_long.append({'img': img, 'type': "Text", 'label': word["DetectedText"], | |
'confidence': word["Confidence"]}) | |
col = 'word_' + str(w) | |
img_dict_wide[col] = word["DetectedText"] | |
img_dict_wide[col+ '_confidence'] = word["Confidence"] | |
except: | |
continue | |
if 'Labels' not in labels and 'CelebrityFaces' not in celebrities and "TextDetections" not in text_in_image: | |
results_long.append({'img': img, 'type': None, 'label': None, 'confidence': None}) | |
results_wide.append(img_dict_wide) | |
#### | |
#### | |
img_df_long = pd.DataFrame(results_long, columns=['img', 'type', 'label', 'confidence']) | |
img_df_wide = pd.DataFrame(results_wide) | |
cols = sorted(img_df_wide.columns) | |
cols.remove('img') | |
img_df_wide = img_df_wide[['img'] + cols] | |
#### | |
#### | |
# save down your dfs. | |
#For our topic modelers only focused on images data! | |
img_df_long.to_csv("twitter_img_text_long.csv") | |
#For mapping to the dataframe initially provided to us. | |
img_df_wide.to_csv("twitter_img_text_wide.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment