nmolivo/rekognition-for-loop

## rekognition-for-loop
bucket_name = 'bucket_name'
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
images = [img.key for img in bucket.objects.all()] #fetches image names from your S3 bucket
client = boto3.client('rekognition')

results_wide = []
results_long = []

for img in images:
    img_dict_wide = {'img': img}
    #print(img)
    try:
        labels = client.detect_labels(Image={'S3Object':{'Bucket':bucket_name,'Name':img}},MinConfidence=75)
        if 'Labels' in labels:
            for l, label in enumerate(labels['Labels']):
                results_long.append({'img': img, 'type': 'Label', 'label': label['Name'],
                                     'confidence': label['Confidence']})
                col = 'label_' + str(l)
                img_dict_wide[col] = label['Name']
                img_dict_wide[col + '_confidence'] = label['Confidence']
    except:
        continue
    try:
        celebrities = client.recognize_celebrities(Image={'S3Object':{'Bucket':bucket_name,'Name':img}})
        if 'CelebrityFaces' in celebrities:
            for f, face in enumerate(celebrities['CelebrityFaces']):
                results_long.append({'img': img, 'type': 'Celebrity', 'label': face['Name'],
                                     'confidence': face['Face']['Confidence']})
                col = 'celeb_' + str(f)
                img_dict_wide[col] = face['Name']
                img_dict_wide[col + '_confidence'] = face['Face']['Confidence']
    except:
        continue
    try:
        text_in_image = client.detect_text(Image={'S3Object':{'Bucket':bucket_name,'Name':img}})
        if "TextDetections" in text_in_image:
            for w, word in enumerate(text_in_image["TextDetections"]):
                results_long.append({'img': img, 'type': "Text", 'label': word["DetectedText"],
                                    'confidence': word["Confidence"]})
                col = 'word_' + str(w)
                img_dict_wide[col] = word["DetectedText"]
                img_dict_wide[col+ '_confidence'] = word["Confidence"]
    except:
        continue

    if 'Labels' not in labels and 'CelebrityFaces' not in celebrities and "TextDetections" not in text_in_image:
        results_long.append({'img': img, 'type': None, 'label': None, 'confidence': None})

    results_wide.append(img_dict_wide)
####
####
img_df_long = pd.DataFrame(results_long, columns=['img', 'type', 'label', 'confidence'])
img_df_wide = pd.DataFrame(results_wide)
cols = sorted(img_df_wide.columns)
cols.remove('img')
img_df_wide = img_df_wide[['img'] + cols]
####
####
# save down your dfs.

#For our topic modelers only focused on images data!
img_df_long.to_csv("twitter_img_text_long.csv")

#For mapping to the dataframe initially provided to us.
img_df_wide.to_csv("twitter_img_text_wide.csv")
	bucket_name = 'bucket_name'
	s3 = boto3.resource('s3')
	bucket = s3.Bucket(bucket_name)
	images = [img.key for img in bucket.objects.all()] #fetches image names from your S3 bucket
	client = boto3.client('rekognition')

	results_wide = []
	results_long = []

	for img in images:
	img_dict_wide = {'img': img}
	#print(img)
	try:
	labels = client.detect_labels(Image={'S3Object':{'Bucket':bucket_name,'Name':img}},MinConfidence=75)
	if 'Labels' in labels:
	for l, label in enumerate(labels['Labels']):
	results_long.append({'img': img, 'type': 'Label', 'label': label['Name'],
	'confidence': label['Confidence']})
	col = 'label_' + str(l)
	img_dict_wide[col] = label['Name']
	img_dict_wide[col + '_confidence'] = label['Confidence']
	except:
	continue
	try:
	celebrities = client.recognize_celebrities(Image={'S3Object':{'Bucket':bucket_name,'Name':img}})
	if 'CelebrityFaces' in celebrities:
	for f, face in enumerate(celebrities['CelebrityFaces']):
	results_long.append({'img': img, 'type': 'Celebrity', 'label': face['Name'],
	'confidence': face['Face']['Confidence']})
	col = 'celeb_' + str(f)
	img_dict_wide[col] = face['Name']
	img_dict_wide[col + '_confidence'] = face['Face']['Confidence']
	except:
	continue
	try:
	text_in_image = client.detect_text(Image={'S3Object':{'Bucket':bucket_name,'Name':img}})
	if "TextDetections" in text_in_image:
	for w, word in enumerate(text_in_image["TextDetections"]):
	results_long.append({'img': img, 'type': "Text", 'label': word["DetectedText"],
	'confidence': word["Confidence"]})
	col = 'word_' + str(w)
	img_dict_wide[col] = word["DetectedText"]
	img_dict_wide[col+ '_confidence'] = word["Confidence"]
	except:
	continue

	if 'Labels' not in labels and 'CelebrityFaces' not in celebrities and "TextDetections" not in text_in_image:
	results_long.append({'img': img, 'type': None, 'label': None, 'confidence': None})

	results_wide.append(img_dict_wide)
	####
	####
	img_df_long = pd.DataFrame(results_long, columns=['img', 'type', 'label', 'confidence'])
	img_df_wide = pd.DataFrame(results_wide)
	cols = sorted(img_df_wide.columns)
	cols.remove('img')
	img_df_wide = img_df_wide[['img'] + cols]
	####
	####
	# save down your dfs.

	#For our topic modelers only focused on images data!
	img_df_long.to_csv("twitter_img_text_long.csv")

	#For mapping to the dataframe initially provided to us.
	img_df_wide.to_csv("twitter_img_text_wide.csv")