Skip to content

Instantly share code, notes, and snippets.

@rlieberman
Created May 11, 2015 02:53
Show Gist options
  • Save rlieberman/7ab1b6f5c2a72b73231c to your computer and use it in GitHub Desktop.
Save rlieberman/7ab1b6f5c2a72b73231c to your computer and use it in GitHub Desktop.
RWET Final Project by Rebecca Lieberman, ITP Spring 2015
#IT SEES by Rebecca Lieberman
#Final Project for Reading and Writing Electronic Text @ ITP, taught by Allison Parrish
#Spring 2015
#Code sources --
#Bing Search API Python Wrapper https://github.com/xthepoet/pyBingSearchAPI
#Script that fetches image descriptions from Toronto Deep Learning https://github.com/cmyr/INTERESTING_JPG
#STEPS TO RUN THIS CODE:
#1. create a virtual environment in your project directory
#2. install textblob, requests and beautiful soup to your virtual env
#3. run this script with a sys.argv input from the command line (this is your image search term)
#4. make sure the following two modules are in your project directory (+download them with the following links):
# Bing Search API Python Wrapper: https://www.dropbox.com/s/hlk4tcfims5no3z/bing_search_api.py?dl=0
# Scraping Toronto Deep Learning: https://www.dropbox.com/s/9j0miitbwpgf4zh/cvserver.py?dl=0
import sys
import random
import urllib #import urllib to download images from their URLs
import time #to implement delays in making requests to Toronto Deep Learning
from textblob import TextBlob, Word #import the class TextBlob from textblob, Word to get definitions
from bing_search_api import BingSearchAPI #import the class BingSearchAPI from https://github.com/xthepoet/pyBingSearchAPI
from cvserver import response_for_image, captions #import the function response_for_image to fetch image descriptions, caption to extract them from HTML using beautiful soup
#MY FUNCTONS
def capitalize_sentence(sentence): #pass in a string to capitalize and fix punctuation
# sentence = sentence.replace(' .', '.')
return sentence[0].upper() + sentence[1:].replace(' .', '.').replace(' , ', ', ')
#PART 1: GET IMAGES FROM THE BING API
my_key = "jYMpFzaxAn5jTbR+SUUMhoX8hqxXNYU72zaIRBFaGmA" #my API key for Bing Search
query_string = sys.argv[1] #get query string as input from command line using sys.argv, for multiple words use query between " "
bing = BingSearchAPI(my_key)
#parameters for image searching -- more documentation on params and image filters here http://goo.gl/xG0v0O
params = {'ImageFilters':'"Style:Photo"',
'$format': 'json', #specifies format of data response
'$top': 50, #specifies number of results to return, default is 50
'$skip': 0} #specifies starting point offset for the results
#bing.search()requires sources first (images, web, video, etc.), then query string, then rest of params (above)
#full schema documentation for bing API is here http://goo.gl/xG0v0O
results = bing.search('image',query_string,params).json() #requests 1.0+
image_list = results['d']['results'][0]['Image'] #this gets us to the list of all the imageURLs
#create a new list of all the image source URLs using a list comprehension
image_urls = [image['MediaUrl'] for image in image_list if len(image['MediaUrl']) > 0]
# for url in image_urls: #print the list of image urls
# print url
# #download all those images to a directory (so i have them) -- only do this if you need the images, takes a lot of time
# for url in image_urls:
# file_name = url.rsplit('/',1)[1]
# urllib.urlretrieve(url, file_name)
#PART 2: GET IMAGE DESCRIPTIONS FROM TORONTO DEEP LEARNING
#for each image, get the 5-sentence image description from Toronto Deep Learning using the response_for_image function and captions
clientname = 'RSLRWET' #define the client name with some unique name, required part of cvserver.py
all_descriptions = list() #create a list that will hold all of the descriptions for all the image
#if i want to slice the list of image_urls (to get under 50 results), this is the number of images to put into the deep learning
#then slice the list in the for loop, ie "for url in image_urls[:response_num]:
# response_num = 12
#get 5-sentence captions for each image using captions function, then add that text to the big list of all descriptions
for url in image_urls: #for each image URL get the list of 5 captions
raw_text = response_for_image(url, clientname)
description = captions(raw_text) #description returns a list of all the descriptions for each image (5 sentences)
if description is not None: #make sure the description is not of type None or else you'll get an error
for each_description in description: #then loop over the list description
if len(each_description) > 0:
all_descriptions.append(each_description) #add each description to the list of all descriptions
print description #print out the individual description as it loads
time.sleep(random.uniform(0.4, 0.75)) #put in a random delay between requests
#PART 3: CREATE A LIST OF NOUN PHRASES AND NOUNS TO COMPOSE OTHER PARTS OF THE POEM
#get all the noun phrases from the image descriptions using TextBlob (from the list of ALL descriptions)
all_noun_phrases = list() #create an empty list for the noun phrases
for sentence in all_descriptions: #for every sentence in the list of descriptions, make each sentence into a textblob object
blob_sentence = TextBlob(sentence) #text blob needs a string, each sentence is a text blob, each noun phrase comes in a list
for phrase in blob_sentence.noun_phrases: #we want to get that phrase out of a list
if len(blob_sentence.noun_phrases) > 0: #if the length of that list is greater than 0 (ie if it's not an empty list cause of line break)
all_noun_phrases.append(phrase) #so for every item in that list (usually 1), let's add it to a list of ALL noun phrases
print all_noun_phrases
#CREATE A LIST OF NOUNS
all_nouns = list() #create an empty list to hold all the nouns
for sentence in all_descriptions: #make each sentence in all descriptions into a textblob
blob_sentence = TextBlob(sentence) #text blob needs a string, each sentence is a text blob
for word, tag in blob_sentence.tags:
if tag == 'NN': #if the tag is noun
if len(word) > 2:
all_nouns.append(word) #add it to the list of all the nouns
print all_nouns
#PART 3: CONSTRUCT THE POEM
#adjust the for loop -- for i in range (X) -- depending on how many stanzas you want to create
for i in range(4):
#pick a random noun from the list of all nouns and get the definition, we'll print it out later
random_noun = random.choice(all_nouns) #randomly choose a noun and get all the definitions
random_noun_definition = random.choice(Word(random_noun).definitions) #pick a random one of all the definitions
#pick three random description lines from the list all_descriptions
#sample_num holds the amount of sentences we want to sample, right now it's three
sample_num = 3
print "It sees a " + random.choice(all_noun_phrases) + "."
for sentence in random.sample(all_descriptions, sample_num): #pick three random description lines from the list all_descriptions
sentence = sentence.strip() #strip the extra linebreaks from the sentence
print capitalize_sentence(sentence) #capitalize the sentence
print "It sees a " + ", ".join(random.sample(all_noun_phrases, random.randrange(2, 12))) + " and " + random.choice(all_noun_phrases) + "."
print "It sees a " + random_noun_definition + "."
for sentence in random.sample(all_descriptions, sample_num): #pick three random description lines from the list all_descriptions
sentence = sentence.strip() #strip the extra linebreaks from the sentence
print capitalize_sentence(sentence) #capitalize the sentence
print '\n' #print a line break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment