Skip to content

Instantly share code, notes, and snippets.

@raisalitch
Last active March 18, 2018 03:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raisalitch/c5d73f51b1840850125453b447ed80f7 to your computer and use it in GitHub Desktop.
Save raisalitch/c5d73f51b1840850125453b447ed80f7 to your computer and use it in GitHub Desktop.
In a time when there are 2.01 billion active monthly Facebook users and up to 300 million daily photo uploads,
the need to structure and interpret raw data is more important than ever. Our news feeds, twitter feeds,
reddit feeds are crawling with images that have been altered and shared to spread the most current trending
images and jokes. Essentially, Meme Culture has taken the social media world by storm.
We are at the forefront of Meme Innovation, modelling and analysing data collected from memes. We offer Viability,
Accessibility, Outreach and Freshness by gaining unique insights into the trends of Meme popularity in social media.
------------------------------------------------------------------------------------------------------------------------------
https://files.slack.com/files-tmb/T65U6DG8P-F6J2ZRB4Y-5ba94bf276/image_720.png
Our teams collective face when we learnt we may have accidentally crash the accenture server
#SweetDreamsAreMadeOfMemes
Mockup:
https://scontent.fcbr1-1.fna.fbcdn.net/v/t35.0-12/20663420_1824029017614365_1582726466_o.png?oh=8e582d8dee3bfb206635af0a930749d7&oe=59885E72
meme_dictionary = {}
meme_dictionary[1] = (['protec','attac','protecc','attacc'],2,1,'protec/attac')
meme_dictionary[2] = (['how','to','talk','to','short','people'],6,2,'short_people')
meme_dictionary[3] = (['her:','Her:','Date:','Me','Him','trying','Trying','[Trying','[trying','(Trying','(trying','*Trying','*trying',
'to','impress','her]','her)','her*'],7,3,'date')
meme_dictionary[4] = (['Guys','with','hair','like','this','chance'],6,4,'hair')
meme_dictionary[5] = (['who','Who','would','win'],3,5,'fight')
meme_dictionary[6] = (['therapist:','Therapist'],1,6,'therapist')
meme_dictionary[7] = (['Italian','italian','How','italians','Italians'],2,7,'italian people')
meme_dictionary[8] = (['Barber','what','you','want','fam?','Say','no','more'],8,8,'barber')
meme_dictionary[9] = (['I','sleep','real','shit','Real'],5,9,'real shit')
meme_dictionary[10] = (['machine','Machine','broke',],2,10,'shaq_mac')
meme_dictionary[11] = (['cold','Cold','one','with','the','boys','cracking','crackin'],5,11,'cold boys')
meme_dictionary[12] = (['Language','language','differences'],2,12)
meme_dictionary[13] = (['You','vs','the','guy','she','told','not','to','worry','about'],6,13,'you vs guy')
meme_dictionary[14] = (['top','10','Top'],2,14,'top 10')
meme_dictionary[15] = (['Depression','depression','sad','suicide'],1,15,'S A D M E M E')
def meme_category_txt(string):
for key,value in meme_dictionary.items():
k = 0
for i1 in value[0]:
if i1 in string:
k += 1
if k >= value[1]:
return value[2]
return -1
import pandas as pd
import trialSmartVision as tsv
import categories as c
import sys
import urllib
def get_text(image):
code = tsv.encodeImage(image)
id = tsv.postImage(code)
desc = tsv.putRunImage(id)['task']['description']
return desc
def rerun_csv(df):
for i in range(len(df)):
id = c.meme_category_txt(str(df['meme_str'][i]))
df['meme_id'][i] = id
if __name__ == '__main__':
image = str(sys.argv[1])
text = get_text(image)
id = c.meme_category_txt(text)
print(c.meme_dictionary[id][3])
import urllib2
import json
import datetime
import csv
import time
import pandas as pd
page_id = 'WindowsMemes69'
#length of https://, as must move back that length from the start of the string (string starts at s in scontent) to capture the https:// too
HTTPSOFFSET = 8
def properUrl(url):
urlkey = "scontent"
try:
urlData = urllib2.urlopen(url)
except urllib2.HTTPError:
return ""
urlData = urllib2.urlopen(url)
data = str(urlData.readlines())
posstart = data.find(urlkey,0,len(data))
if posstart == -1:
return ""
posend = data.index("\"",posstart,len(data))
outurl = data[posstart-HTTPSOFFSET:posend]
outurl = outurl.replace("&","&")
return outurl
def getMemeURL():
fbcsv = pd.read_csv('{}_facebook_statuses.csv'.format(page_id))
fbcsv = fbcsv.assign(cleaned_url = pd.Series([0 for i in range(0,len(fbcsv))]))
for i in range(0,len(fbcsv)):
print(i)
fbcsv['cleaned_url'][i] = properUrl(fbcsv['status_link'][i])
fbcsv.to_csv('{}_facebook_statuses.csv'.format(page_id))
print("Meme links added to csv!")
print ("Getting meme links...")
getMemeURL()
# import some Python dependencies
import urllib2
import json
import datetime
import csv
import time
import pandas as pd
app_id = "108926159782363"
app_secret = "0000"
access_token = app_id + "|" + app_secret
page_id = 'WindowsMemes69'
max_memes = 999
def request_until_succeed(url):
req = urllib2.Request(url)
success = False
while success is False:
try:
response = urllib2.urlopen(req)
if response.getcode() == 200:
success = True
except Exception, e:
print (e)
time.sleep(5)
print ("Error for URL {}: {}".format(url, datetime.datetime.now()))
return response.read()
def getFacebookPageFeedData(page_id, access_token, num_statuses):
# construct the URL string
base = "https://graph.facebook.com"
node = "/" + page_id + "/feed"
parameters = "/?fields=message,link,created_time,type,name,id,likes.limit(1).summary(true),comments.limit(1).summary(true),shares&limit={}&access_token={}".format(num_statuses, access_token) # changed
url = base + node + parameters
# retrieve data
data = json.loads(request_until_succeed(url))
return data
def processFacebookPageFeedStatus(status):
# The status is now a Python dictionary, so for top-level items,
# we can simply call the key.
# Additionally, some items may not always exist,
# so must check for existence first
status_id = status['id']
status_message = '' if 'message' not in status.keys() else status['message'].encode('utf-8')
link_name = '' if 'name' not in status.keys() else status['name'].encode('utf-8')
status_type = status['type']
status_link = '' if 'link' not in status.keys() else status['link']
# Time needs special care since a) it's in UTC and
# b) it's not easy to use in statistical programs.
status_published = datetime.datetime.strptime(status['created_time'],'%Y-%m-%dT%H:%M:%S+0000')
status_published = status_published + datetime.timedelta(hours=-5) # EST
status_published = status_published.strftime('%Y-%m-%d %H:%M:%S') # best time format for spreadsheet programs
# Nested items require chaining dictionary keys.
num_likes = 0 if 'likes' not in status.keys() else status['likes']['summary']['total_count']
num_comments = 0 if 'comments' not in status.keys() else status['comments']['summary']['total_count']
num_shares = 0 if 'shares' not in status.keys() else status['shares']['count']
# return a tuple of all processed data
return (status_id, status_message, link_name, status_type, status_link,
status_published, num_likes, num_comments, num_shares)
def scrapeFacebookPageFeedStatus(page_id, access_token):
with open('{}_facebook_statuses.csv'.format(page_id), 'wb') as file:
w = csv.writer(file)
w.writerow(["status_id", "status_message", "link_name", "status_type", "status_link",
"status_published", "num_likes", "num_comments", "num_shares"])
has_next_page = True
num_processed = 0 # keep a count on how many we've processed
scrape_starttime = datetime.datetime.now()
print "Scraping {} Facebook Page: {}\n".format(page_id, scrape_starttime)
statuses = getFacebookPageFeedData(page_id, access_token, 100)
while has_next_page:
for status in statuses['data']:
if status['type'] == 'photo':
w.writerow(processFacebookPageFeedStatus(status))
# output progress occasionally to make sure code is not stalling
num_processed += 1
if num_processed % 1000 == 0:
print "{} Statuses Processed: {}".format(num_processed, datetime.datetime.now())
if num_processed == max_memes:
break
# if there is no next page, we're done.
if not num_processed % 100:
statuses = json.loads(request_until_succeed(statuses['paging']['next']))
else:
has_next_page = False
print ("\nDone!\n{} Statuses Processed in {}".format(num_processed, datetime.datetime.now() - scrape_starttime))
scrapeFacebookPageFeedStatus(page_id, access_token)
#Trial of smart vision API
import requests, json, base64
import os, sys
import urllib
import pdb
import subprocess
method_type = 'text1'
#types of methods
#tagging1
#tagging2
#tagging3
#descriptions1
#faces1
#text1
#faces2
training_url = "https://www.dropbox.com/s/ggn2oy4px82k3hp/meme.zip" #"https://www.dropbox.com/s/lycxowpy4ikbqm7/exampletrain.zip"
#encodes image data for image recognition
def encodeImage(filename) :
with open(filename, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
return encoded_string
#posts image to API for registration
#create new image
def postImage(encoded_string) :
cmd_json = {'Content-Type':'application/json',}
cmd_params = '{"service":"%s","image":"%s"}'%(method_type,encoded_string)
cmd_auth = ('demo1','hackathon7493')
cmd_url = "http://smartvision.aiam-dh.com:8080/api/v1.0/tasks"
r = requests.post(cmd_url,auth = cmd_auth,data = cmd_params, headers = cmd_json)
res = json.loads(r.text)
file_id = res['task']['uri'].split('/')[-1]
return file_id
#runs NN on trained image
#run the image recognition servcie
def putRunImage(file_id) :
services = ['tagging1','tagging2','tagging3','descriptions1']
cmd_header = {'Content-Type': 'application/json',}
cmd_data = '{"scanned":true}'
cmd_url = 'http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/run/' + file_id
r = requests.put(cmd_url, headers=cmd_header, data=cmd_data, auth=('demo1','hackathon7493'))
res = json.loads(r.text)
return res
#print res
#obtain information about uploaded images
def getImage() :
cmd_auth = ('demo1','hackathon7493')
cmd_url = "http://smartvision.aiam-dh.com:8080/api/v1.0/tasks"
r = requests.get(cmd_url,auth = cmd_auth)
res = json.loads(r.text)
print(res)
#obtain information about an uploaded image
def getImageSpecific(file_id) :
cmd_auth = ('demo1','hackathon7493')
cmd_url = "http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/" + file_id
r = requests.get(cmd_url,auth = cmd_auth)
res = json.loads(r.text)
#print("HELLLOOOOO")
print(res)
#update image with certain file id
def putUpdateImage(file_id) :
cmd_auth = ('demo1','hackathon7493')
cmd_header = {'Content-Type': 'application/json',}
cmd_data = '{"service":"%s"}'%(method_type)
cmd_url = """http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/update/%s"""%(file_id)
#Add check for successful
r = requests.put(cmd_url, auth = cmd_auth, headers = cmd_header, data = cmd_data)
res = json.loads(r.text)
print(res)
#delete an image
def deleteImage(file_id) :
cmd_auth = ('demo1','hackathon7493')
cmd_header = {'Content-Type': 'application/json',}
cmd_url = """http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/%s"""%(file_id)
r = requests.delete(cmd_url, auth = cmd_auth, headers = cmd_header)
res = json.loads(r.text)
print(res)
#posts training set
def postTraining(training_url) :
cmd_json = {'Content-Type':'application/json',}
cmd_params = '{"service":"%s","image":"%s"}'%(method_type,training_url)
cmd_auth = ('demo1','hackathon7493')
cmd_url = "http://smartvision.aiam-dh.com:8080/api/v1.0/tasks"
r = requests.post(cmd_url,auth = cmd_auth,data = cmd_params, headers = cmd_json)
res = json.loads(r.text)
zip_id = res['task']['uri'].split('/')[-1]
return(zip_id)
def putRunImageTrain(zip_id,encoded_string) :
cmd_header = {'Content-Type': 'application/json',}
cmd_params = '{"scanned":true,"image":"%s"}'%(encoded_string)
cmd_url = 'http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/run/' + zip_id
r = requests.put(cmd_url, headers=cmd_header, data=cmd_params, auth=('demo1','hackathon7493'))
res = json.loads(r.text)
print(res)
#we only run if script is run from the command line
if __name__ == '__main__' :
command = str(sys.argv[1])
encoded_string = encodeImage(command)
#train on NN and then post image to recognize face
if method_type == 'faces2' :
zip_id = postTraining("https://www.dropbox.com/s/ggn2oy4px82k3hp/meme.zip") #) "https://www.dropbox.com/s/ggn2oy4px82k3hp/meme.zip"
putRunImageTrain(zip_id,encoded_string)
print(zip_id)
else :
file_id = postImage(encoded_string)
putRunImage(file_id)
'''
To train data upload a zip folder
- upload zip using post? - faces2
- post image
- run a put to run NN
- file id for the zip
'''
'''
#in order to update an image you need to
- post Image onto api
- update image (file id of old image you want to replace)
- Run the NN on the image
'''
import tweepy
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy.streaming import StreamListener
CONSUMER_KEY= "EABODxTkrsk6YyfLj5IeoKPqi"
CONSUMER_SECRET= "8B337hrqalNKesiFf0ruosGRxdwe0HOk7jIb0VajANrxlvzftY"
ACCESS_TOKEN_KEY= "893683857123180545-WT4ME6bKuTvyM9FRbwykbXMFGGtruSE"
ACCESS_TOKEN_SECRET= "69ANY2P1u08RUMjoJKMw6wvPEWzbuWttiPoCCpMQ3Ph5f"
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
class MyListener(StreamListener):
def on_data(self, data):
try:
with open('meme.txt', 'a') as f:
f.write(data)
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
return True
def on_error(self, status):
print(status)
return True
twitter_stream = Stream(auth, MyListener())
twitter_stream.filter(track=['#meme'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment