Created
May 11, 2015 02:53
-
-
Save rlieberman/7ab1b6f5c2a72b73231c to your computer and use it in GitHub Desktop.
RWET Final Project by Rebecca Lieberman, ITP Spring 2015
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#IT SEES by Rebecca Lieberman | |
#Final Project for Reading and Writing Electronic Text @ ITP, taught by Allison Parrish | |
#Spring 2015 | |
#Code sources -- | |
#Bing Search API Python Wrapper https://github.com/xthepoet/pyBingSearchAPI | |
#Script that fetches image descriptions from Toronto Deep Learning https://github.com/cmyr/INTERESTING_JPG | |
#STEPS TO RUN THIS CODE: | |
#1. create a virtual environment in your project directory | |
#2. install textblob, requests and beautiful soup to your virtual env | |
#3. run this script with a sys.argv input from the command line (this is your image search term) | |
#4. make sure the following two modules are in your project directory (+download them with the following links): | |
# Bing Search API Python Wrapper: https://www.dropbox.com/s/hlk4tcfims5no3z/bing_search_api.py?dl=0 | |
# Scraping Toronto Deep Learning: https://www.dropbox.com/s/9j0miitbwpgf4zh/cvserver.py?dl=0 | |
import sys | |
import random | |
import urllib #import urllib to download images from their URLs | |
import time #to implement delays in making requests to Toronto Deep Learning | |
from textblob import TextBlob, Word #import the class TextBlob from textblob, Word to get definitions | |
from bing_search_api import BingSearchAPI #import the class BingSearchAPI from https://github.com/xthepoet/pyBingSearchAPI | |
from cvserver import response_for_image, captions #import the function response_for_image to fetch image descriptions, caption to extract them from HTML using beautiful soup | |
#MY FUNCTONS | |
def capitalize_sentence(sentence): #pass in a string to capitalize and fix punctuation | |
# sentence = sentence.replace(' .', '.') | |
return sentence[0].upper() + sentence[1:].replace(' .', '.').replace(' , ', ', ') | |
#PART 1: GET IMAGES FROM THE BING API | |
my_key = "jYMpFzaxAn5jTbR+SUUMhoX8hqxXNYU72zaIRBFaGmA" #my API key for Bing Search | |
query_string = sys.argv[1] #get query string as input from command line using sys.argv, for multiple words use query between " " | |
bing = BingSearchAPI(my_key) | |
#parameters for image searching -- more documentation on params and image filters here http://goo.gl/xG0v0O | |
params = {'ImageFilters':'"Style:Photo"', | |
'$format': 'json', #specifies format of data response | |
'$top': 50, #specifies number of results to return, default is 50 | |
'$skip': 0} #specifies starting point offset for the results | |
#bing.search()requires sources first (images, web, video, etc.), then query string, then rest of params (above) | |
#full schema documentation for bing API is here http://goo.gl/xG0v0O | |
results = bing.search('image',query_string,params).json() #requests 1.0+ | |
image_list = results['d']['results'][0]['Image'] #this gets us to the list of all the imageURLs | |
#create a new list of all the image source URLs using a list comprehension | |
image_urls = [image['MediaUrl'] for image in image_list if len(image['MediaUrl']) > 0] | |
# for url in image_urls: #print the list of image urls | |
# print url | |
# #download all those images to a directory (so i have them) -- only do this if you need the images, takes a lot of time | |
# for url in image_urls: | |
# file_name = url.rsplit('/',1)[1] | |
# urllib.urlretrieve(url, file_name) | |
#PART 2: GET IMAGE DESCRIPTIONS FROM TORONTO DEEP LEARNING | |
#for each image, get the 5-sentence image description from Toronto Deep Learning using the response_for_image function and captions | |
clientname = 'RSLRWET' #define the client name with some unique name, required part of cvserver.py | |
all_descriptions = list() #create a list that will hold all of the descriptions for all the image | |
#if i want to slice the list of image_urls (to get under 50 results), this is the number of images to put into the deep learning | |
#then slice the list in the for loop, ie "for url in image_urls[:response_num]: | |
# response_num = 12 | |
#get 5-sentence captions for each image using captions function, then add that text to the big list of all descriptions | |
for url in image_urls: #for each image URL get the list of 5 captions | |
raw_text = response_for_image(url, clientname) | |
description = captions(raw_text) #description returns a list of all the descriptions for each image (5 sentences) | |
if description is not None: #make sure the description is not of type None or else you'll get an error | |
for each_description in description: #then loop over the list description | |
if len(each_description) > 0: | |
all_descriptions.append(each_description) #add each description to the list of all descriptions | |
print description #print out the individual description as it loads | |
time.sleep(random.uniform(0.4, 0.75)) #put in a random delay between requests | |
#PART 3: CREATE A LIST OF NOUN PHRASES AND NOUNS TO COMPOSE OTHER PARTS OF THE POEM | |
#get all the noun phrases from the image descriptions using TextBlob (from the list of ALL descriptions) | |
all_noun_phrases = list() #create an empty list for the noun phrases | |
for sentence in all_descriptions: #for every sentence in the list of descriptions, make each sentence into a textblob object | |
blob_sentence = TextBlob(sentence) #text blob needs a string, each sentence is a text blob, each noun phrase comes in a list | |
for phrase in blob_sentence.noun_phrases: #we want to get that phrase out of a list | |
if len(blob_sentence.noun_phrases) > 0: #if the length of that list is greater than 0 (ie if it's not an empty list cause of line break) | |
all_noun_phrases.append(phrase) #so for every item in that list (usually 1), let's add it to a list of ALL noun phrases | |
print all_noun_phrases | |
#CREATE A LIST OF NOUNS | |
all_nouns = list() #create an empty list to hold all the nouns | |
for sentence in all_descriptions: #make each sentence in all descriptions into a textblob | |
blob_sentence = TextBlob(sentence) #text blob needs a string, each sentence is a text blob | |
for word, tag in blob_sentence.tags: | |
if tag == 'NN': #if the tag is noun | |
if len(word) > 2: | |
all_nouns.append(word) #add it to the list of all the nouns | |
print all_nouns | |
#PART 3: CONSTRUCT THE POEM | |
#adjust the for loop -- for i in range (X) -- depending on how many stanzas you want to create | |
for i in range(4): | |
#pick a random noun from the list of all nouns and get the definition, we'll print it out later | |
random_noun = random.choice(all_nouns) #randomly choose a noun and get all the definitions | |
random_noun_definition = random.choice(Word(random_noun).definitions) #pick a random one of all the definitions | |
#pick three random description lines from the list all_descriptions | |
#sample_num holds the amount of sentences we want to sample, right now it's three | |
sample_num = 3 | |
print "It sees a " + random.choice(all_noun_phrases) + "." | |
for sentence in random.sample(all_descriptions, sample_num): #pick three random description lines from the list all_descriptions | |
sentence = sentence.strip() #strip the extra linebreaks from the sentence | |
print capitalize_sentence(sentence) #capitalize the sentence | |
print "It sees a " + ", ".join(random.sample(all_noun_phrases, random.randrange(2, 12))) + " and " + random.choice(all_noun_phrases) + "." | |
print "It sees a " + random_noun_definition + "." | |
for sentence in random.sample(all_descriptions, sample_num): #pick three random description lines from the list all_descriptions | |
sentence = sentence.strip() #strip the extra linebreaks from the sentence | |
print capitalize_sentence(sentence) #capitalize the sentence | |
print '\n' #print a line break | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment