rlieberman/itsees.py

## itsees.py
#IT SEES by Rebecca Lieberman
#Final Project for Reading and Writing Electronic Text @ ITP, taught by Allison Parrish
#Spring 2015

#Code sources --
#Bing Search API Python Wrapper https://github.com/xthepoet/pyBingSearchAPI
#Script that fetches image descriptions from Toronto Deep Learning https://github.com/cmyr/INTERESTING_JPG

#STEPS TO RUN THIS CODE:
#1. create a virtual environment in your project directory
#2. install  textblob, requests and beautiful soup to your virtual env
#3. run this script with a sys.argv input from the command line (this is your image search term)
#4. make sure the following two modules are in your project directory (+download them with the following links):
#		Bing Search API Python Wrapper: https://www.dropbox.com/s/hlk4tcfims5no3z/bing_search_api.py?dl=0
#		Scraping Toronto Deep Learning: https://www.dropbox.com/s/9j0miitbwpgf4zh/cvserver.py?dl=0


import sys
import random
import urllib									#import urllib to download images from their URLs
import time 									#to implement delays in making requests to Toronto Deep Learning
from textblob import TextBlob, Word				#import the class TextBlob from textblob, Word to get definitions
from bing_search_api import BingSearchAPI 		#import the class BingSearchAPI from https://github.com/xthepoet/pyBingSearchAPI
from cvserver import response_for_image, captions		#import the function response_for_image to fetch image descriptions, caption to extract them from HTML using beautiful soup

#MY FUNCTONS
def capitalize_sentence(sentence):			#pass in a string to capitalize and fix punctuation
	# sentence = sentence.replace(' .', '.')
	return sentence[0].upper() + sentence[1:].replace(' .', '.').replace(' , ', ', ')

#PART 1: GET IMAGES FROM THE BING API
my_key = "jYMpFzaxAn5jTbR+SUUMhoX8hqxXNYU72zaIRBFaGmA"	#my API key for Bing Search
query_string = sys.argv[1]	#get query string as input from command line using sys.argv, for multiple words use query between " "
bing = BingSearchAPI(my_key)

#parameters for image searching -- more documentation on params and image filters here http://goo.gl/xG0v0O
params = {'ImageFilters':'"Style:Photo"',
          '$format': 'json',	#specifies format of data response
      	  '$top': 50,			#specifies number of results to return, default is 50
          '$skip': 0}			#specifies starting point offset for the results

#bing.search()requires sources first (images, web, video, etc.), then query string, then rest of params (above)
#full schema documentation for bing API is here http://goo.gl/xG0v0O
results = bing.search('image',query_string,params).json() 	#requests 1.0+

image_list = results['d']['results'][0]['Image']	#this gets us to the list of all the imageURLs

#create a new list of all the image source URLs using a list comprehension
image_urls = [image['MediaUrl'] for image in image_list if len(image['MediaUrl']) > 0]
# for url in image_urls:	#print the list of image urls
# 	print url

# #download all those images to a directory (so i have them) -- only do this if you need the images, takes a lot of time
# for url in image_urls:
# 	file_name = url.rsplit('/',1)[1]
# 	urllib.urlretrieve(url, file_name)


#PART 2: GET IMAGE DESCRIPTIONS FROM TORONTO DEEP LEARNING
#for each image, get the 5-sentence image description from Toronto Deep Learning using the response_for_image function and captions
clientname = 'RSLRWET'	#define the client name with some unique name, required part of cvserver.py
all_descriptions = list()	#create a list that will hold all of the descriptions for all the image

#if i want to slice the list of image_urls (to get under 50 results), this is the number of images to put into the deep learning
#then slice the list in the for loop, ie "for url in image_urls[:response_num]:
# response_num = 12

#get 5-sentence captions for each image using captions function, then add that text to the big list of all descriptions
for url in image_urls:		#for each image URL get the list of 5 captions
	raw_text = response_for_image(url, clientname)
	description = captions(raw_text)	#description returns a list of all the descriptions for each image (5 sentences)
	if description is not None:		#make sure the description is not of type None or else you'll get an error
		for each_description in description:	#then loop over the list description
			if len(each_description) > 0:
				all_descriptions.append(each_description) #add each description to the list of all descriptions
	print description	#print out the individual description as it loads
	time.sleep(random.uniform(0.4, 0.75))  	#put in a random delay between requests


#PART 3: CREATE A LIST OF NOUN PHRASES AND NOUNS TO COMPOSE OTHER PARTS OF THE POEM
#get all the noun phrases from the image descriptions using TextBlob (from the list of ALL descriptions)
all_noun_phrases = list()				#create an empty list for the noun phrases
for sentence in all_descriptions:		#for every sentence in the list of descriptions, make each sentence into a textblob object
	blob_sentence = TextBlob(sentence)	#text blob needs a string, each sentence is a text blob, each noun phrase comes in a list
	for phrase in blob_sentence.noun_phrases:	#we want to get that phrase out of a list
		if len(blob_sentence.noun_phrases) > 0: #if the length of that list is greater than 0 (ie if it's not an empty list cause of line break)
			all_noun_phrases.append(phrase)			#so for every item in that list (usually 1), let's add it to a list of ALL noun phrases
print all_noun_phrases

#CREATE A LIST OF NOUNS
all_nouns = list()						#create an empty list to hold all the nouns
for sentence in all_descriptions:		#make each sentence in all descriptions into a textblob
	blob_sentence = TextBlob(sentence)	#text blob needs a string, each sentence is a text blob
	for word, tag in blob_sentence.tags:
		if tag == 'NN':					#if the tag is noun
			if len(word) > 2:
				all_nouns.append(word)	#add it to the list of all the nouns
print all_nouns


#PART 3: CONSTRUCT THE POEM
#adjust the for loop -- for i in range (X) -- depending on how many stanzas you want to create

for i in range(4):

	#pick a random noun from the list of all nouns and get the definition, we'll print it out later
	random_noun = random.choice(all_nouns)									#randomly choose a noun and get all the definitions
	random_noun_definition =  random.choice(Word(random_noun).definitions)	#pick a random one of all the definitions

	#pick three random description lines from the list all_descriptions
	#sample_num holds the amount of sentences we want to sample, right now it's three
	sample_num = 3

	print "It sees a " + random.choice(all_noun_phrases) + "."
	for sentence in random.sample(all_descriptions, sample_num):	#pick three random description lines from the list all_descriptions
		sentence = sentence.strip()					#strip the extra linebreaks from the sentence
		print capitalize_sentence(sentence)			#capitalize the sentence

	print "It sees a " + ", ".join(random.sample(all_noun_phrases, random.randrange(2, 12))) + " and " + random.choice(all_noun_phrases) + "."
	print "It sees a " + random_noun_definition + "."
	for sentence in random.sample(all_descriptions, sample_num):	#pick three random description lines from the list all_descriptions
		sentence = sentence.strip()									#strip the extra linebreaks from the sentence
		print capitalize_sentence(sentence) 	 					#capitalize the sentence
	print '\n'														#print a line break
	#IT SEES by Rebecca Lieberman
	#Final Project for Reading and Writing Electronic Text @ ITP, taught by Allison Parrish
	#Spring 2015

	#Code sources --
	#Bing Search API Python Wrapper https://github.com/xthepoet/pyBingSearchAPI
	#Script that fetches image descriptions from Toronto Deep Learning https://github.com/cmyr/INTERESTING_JPG

	#STEPS TO RUN THIS CODE:
	#1. create a virtual environment in your project directory
	#2. install textblob, requests and beautiful soup to your virtual env
	#3. run this script with a sys.argv input from the command line (this is your image search term)
	#4. make sure the following two modules are in your project directory (+download them with the following links):
	# Bing Search API Python Wrapper: https://www.dropbox.com/s/hlk4tcfims5no3z/bing_search_api.py?dl=0
	# Scraping Toronto Deep Learning: https://www.dropbox.com/s/9j0miitbwpgf4zh/cvserver.py?dl=0


	import sys
	import random
	import urllib #import urllib to download images from their URLs
	import time #to implement delays in making requests to Toronto Deep Learning
	from textblob import TextBlob, Word #import the class TextBlob from textblob, Word to get definitions
	from bing_search_api import BingSearchAPI #import the class BingSearchAPI from https://github.com/xthepoet/pyBingSearchAPI
	from cvserver import response_for_image, captions #import the function response_for_image to fetch image descriptions, caption to extract them from HTML using beautiful soup

	#MY FUNCTONS
	def capitalize_sentence(sentence): #pass in a string to capitalize and fix punctuation
	# sentence = sentence.replace(' .', '.')
	return sentence[0].upper() + sentence[1:].replace(' .', '.').replace(' , ', ', ')

	#PART 1: GET IMAGES FROM THE BING API
	my_key = "jYMpFzaxAn5jTbR+SUUMhoX8hqxXNYU72zaIRBFaGmA" #my API key for Bing Search
	query_string = sys.argv[1] #get query string as input from command line using sys.argv, for multiple words use query between " "
	bing = BingSearchAPI(my_key)

	#parameters for image searching -- more documentation on params and image filters here http://goo.gl/xG0v0O
	params = {'ImageFilters':'"Style:Photo"',
	'$format': 'json', #specifies format of data response
	'$top': 50, #specifies number of results to return, default is 50
	'$skip': 0} #specifies starting point offset for the results

	#bing.search()requires sources first (images, web, video, etc.), then query string, then rest of params (above)
	#full schema documentation for bing API is here http://goo.gl/xG0v0O
	results = bing.search('image',query_string,params).json() #requests 1.0+

	image_list = results['d']['results'][0]['Image'] #this gets us to the list of all the imageURLs

	#create a new list of all the image source URLs using a list comprehension
	image_urls = [image['MediaUrl'] for image in image_list if len(image['MediaUrl']) > 0]
	# for url in image_urls: #print the list of image urls
	# print url

	# #download all those images to a directory (so i have them) -- only do this if you need the images, takes a lot of time
	# for url in image_urls:
	# file_name = url.rsplit('/',1)[1]
	# urllib.urlretrieve(url, file_name)


	#PART 2: GET IMAGE DESCRIPTIONS FROM TORONTO DEEP LEARNING
	#for each image, get the 5-sentence image description from Toronto Deep Learning using the response_for_image function and captions
	clientname = 'RSLRWET' #define the client name with some unique name, required part of cvserver.py
	all_descriptions = list() #create a list that will hold all of the descriptions for all the image

	#if i want to slice the list of image_urls (to get under 50 results), this is the number of images to put into the deep learning
	#then slice the list in the for loop, ie "for url in image_urls[:response_num]:
	# response_num = 12

	#get 5-sentence captions for each image using captions function, then add that text to the big list of all descriptions
	for url in image_urls: #for each image URL get the list of 5 captions
	raw_text = response_for_image(url, clientname)
	description = captions(raw_text) #description returns a list of all the descriptions for each image (5 sentences)
	if description is not None: #make sure the description is not of type None or else you'll get an error
	for each_description in description: #then loop over the list description
	if len(each_description) > 0:
	all_descriptions.append(each_description) #add each description to the list of all descriptions
	print description #print out the individual description as it loads
	time.sleep(random.uniform(0.4, 0.75)) #put in a random delay between requests


	#PART 3: CREATE A LIST OF NOUN PHRASES AND NOUNS TO COMPOSE OTHER PARTS OF THE POEM
	#get all the noun phrases from the image descriptions using TextBlob (from the list of ALL descriptions)
	all_noun_phrases = list() #create an empty list for the noun phrases
	for sentence in all_descriptions: #for every sentence in the list of descriptions, make each sentence into a textblob object
	blob_sentence = TextBlob(sentence) #text blob needs a string, each sentence is a text blob, each noun phrase comes in a list
	for phrase in blob_sentence.noun_phrases: #we want to get that phrase out of a list
	if len(blob_sentence.noun_phrases) > 0: #if the length of that list is greater than 0 (ie if it's not an empty list cause of line break)
	all_noun_phrases.append(phrase) #so for every item in that list (usually 1), let's add it to a list of ALL noun phrases
	print all_noun_phrases

	#CREATE A LIST OF NOUNS
	all_nouns = list() #create an empty list to hold all the nouns
	for sentence in all_descriptions: #make each sentence in all descriptions into a textblob
	blob_sentence = TextBlob(sentence) #text blob needs a string, each sentence is a text blob
	for word, tag in blob_sentence.tags:
	if tag == 'NN': #if the tag is noun
	if len(word) > 2:
	all_nouns.append(word) #add it to the list of all the nouns
	print all_nouns


	#PART 3: CONSTRUCT THE POEM
	#adjust the for loop -- for i in range (X) -- depending on how many stanzas you want to create

	for i in range(4):

	#pick a random noun from the list of all nouns and get the definition, we'll print it out later
	random_noun = random.choice(all_nouns) #randomly choose a noun and get all the definitions
	random_noun_definition = random.choice(Word(random_noun).definitions) #pick a random one of all the definitions

	#pick three random description lines from the list all_descriptions
	#sample_num holds the amount of sentences we want to sample, right now it's three
	sample_num = 3

	print "It sees a " + random.choice(all_noun_phrases) + "."
	for sentence in random.sample(all_descriptions, sample_num): #pick three random description lines from the list all_descriptions
	sentence = sentence.strip() #strip the extra linebreaks from the sentence
	print capitalize_sentence(sentence) #capitalize the sentence

	print "It sees a " + ", ".join(random.sample(all_noun_phrases, random.randrange(2, 12))) + " and " + random.choice(all_noun_phrases) + "."
	print "It sees a " + random_noun_definition + "."
	for sentence in random.sample(all_descriptions, sample_num): #pick three random description lines from the list all_descriptions
	sentence = sentence.strip() #strip the extra linebreaks from the sentence
	print capitalize_sentence(sentence) #capitalize the sentence
	print '\n' #print a line break