-
-
Save raisalitch/c5d73f51b1840850125453b447ed80f7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In a time when there are 2.01 billion active monthly Facebook users and up to 300 million daily photo uploads, | |
the need to structure and interpret raw data is more important than ever. Our news feeds, twitter feeds, | |
reddit feeds are crawling with images that have been altered and shared to spread the most current trending | |
images and jokes. Essentially, Meme Culture has taken the social media world by storm. | |
We are at the forefront of Meme Innovation, modelling and analysing data collected from memes. We offer Viability, | |
Accessibility, Outreach and Freshness by gaining unique insights into the trends of Meme popularity in social media. | |
------------------------------------------------------------------------------------------------------------------------------ | |
https://files.slack.com/files-tmb/T65U6DG8P-F6J2ZRB4Y-5ba94bf276/image_720.png | |
Our teams collective face when we learnt we may have accidentally crash the accenture server | |
#SweetDreamsAreMadeOfMemes | |
Mockup: | |
https://scontent.fcbr1-1.fna.fbcdn.net/v/t35.0-12/20663420_1824029017614365_1582726466_o.png?oh=8e582d8dee3bfb206635af0a930749d7&oe=59885E72 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
meme_dictionary = {} | |
meme_dictionary[1] = (['protec','attac','protecc','attacc'],2,1,'protec/attac') | |
meme_dictionary[2] = (['how','to','talk','to','short','people'],6,2,'short_people') | |
meme_dictionary[3] = (['her:','Her:','Date:','Me','Him','trying','Trying','[Trying','[trying','(Trying','(trying','*Trying','*trying', | |
'to','impress','her]','her)','her*'],7,3,'date') | |
meme_dictionary[4] = (['Guys','with','hair','like','this','chance'],6,4,'hair') | |
meme_dictionary[5] = (['who','Who','would','win'],3,5,'fight') | |
meme_dictionary[6] = (['therapist:','Therapist'],1,6,'therapist') | |
meme_dictionary[7] = (['Italian','italian','How','italians','Italians'],2,7,'italian people') | |
meme_dictionary[8] = (['Barber','what','you','want','fam?','Say','no','more'],8,8,'barber') | |
meme_dictionary[9] = (['I','sleep','real','shit','Real'],5,9,'real shit') | |
meme_dictionary[10] = (['machine','Machine','broke',],2,10,'shaq_mac') | |
meme_dictionary[11] = (['cold','Cold','one','with','the','boys','cracking','crackin'],5,11,'cold boys') | |
meme_dictionary[12] = (['Language','language','differences'],2,12) | |
meme_dictionary[13] = (['You','vs','the','guy','she','told','not','to','worry','about'],6,13,'you vs guy') | |
meme_dictionary[14] = (['top','10','Top'],2,14,'top 10') | |
meme_dictionary[15] = (['Depression','depression','sad','suicide'],1,15,'S A D M E M E') | |
def meme_category_txt(string): | |
for key,value in meme_dictionary.items(): | |
k = 0 | |
for i1 in value[0]: | |
if i1 in string: | |
k += 1 | |
if k >= value[1]: | |
return value[2] | |
return -1 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import trialSmartVision as tsv | |
import categories as c | |
import sys | |
import urllib | |
def get_text(image): | |
code = tsv.encodeImage(image) | |
id = tsv.postImage(code) | |
desc = tsv.putRunImage(id)['task']['description'] | |
return desc | |
def rerun_csv(df): | |
for i in range(len(df)): | |
id = c.meme_category_txt(str(df['meme_str'][i])) | |
df['meme_id'][i] = id | |
if __name__ == '__main__': | |
image = str(sys.argv[1]) | |
text = get_text(image) | |
id = c.meme_category_txt(text) | |
print(c.meme_dictionary[id][3]) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
import json | |
import datetime | |
import csv | |
import time | |
import pandas as pd | |
page_id = 'WindowsMemes69' | |
#length of https://, as must move back that length from the start of the string (string starts at s in scontent) to capture the https:// too | |
HTTPSOFFSET = 8 | |
def properUrl(url): | |
urlkey = "scontent" | |
try: | |
urlData = urllib2.urlopen(url) | |
except urllib2.HTTPError: | |
return "" | |
urlData = urllib2.urlopen(url) | |
data = str(urlData.readlines()) | |
posstart = data.find(urlkey,0,len(data)) | |
if posstart == -1: | |
return "" | |
posend = data.index("\"",posstart,len(data)) | |
outurl = data[posstart-HTTPSOFFSET:posend] | |
outurl = outurl.replace("&","&") | |
return outurl | |
def getMemeURL(): | |
fbcsv = pd.read_csv('{}_facebook_statuses.csv'.format(page_id)) | |
fbcsv = fbcsv.assign(cleaned_url = pd.Series([0 for i in range(0,len(fbcsv))])) | |
for i in range(0,len(fbcsv)): | |
print(i) | |
fbcsv['cleaned_url'][i] = properUrl(fbcsv['status_link'][i]) | |
fbcsv.to_csv('{}_facebook_statuses.csv'.format(page_id)) | |
print("Meme links added to csv!") | |
print ("Getting meme links...") | |
getMemeURL() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import some Python dependencies | |
import urllib2 | |
import json | |
import datetime | |
import csv | |
import time | |
import pandas as pd | |
app_id = "108926159782363" | |
app_secret = "0000" | |
access_token = app_id + "|" + app_secret | |
page_id = 'WindowsMemes69' | |
max_memes = 999 | |
def request_until_succeed(url): | |
req = urllib2.Request(url) | |
success = False | |
while success is False: | |
try: | |
response = urllib2.urlopen(req) | |
if response.getcode() == 200: | |
success = True | |
except Exception, e: | |
print (e) | |
time.sleep(5) | |
print ("Error for URL {}: {}".format(url, datetime.datetime.now())) | |
return response.read() | |
def getFacebookPageFeedData(page_id, access_token, num_statuses): | |
# construct the URL string | |
base = "https://graph.facebook.com" | |
node = "/" + page_id + "/feed" | |
parameters = "/?fields=message,link,created_time,type,name,id,likes.limit(1).summary(true),comments.limit(1).summary(true),shares&limit={}&access_token={}".format(num_statuses, access_token) # changed | |
url = base + node + parameters | |
# retrieve data | |
data = json.loads(request_until_succeed(url)) | |
return data | |
def processFacebookPageFeedStatus(status): | |
# The status is now a Python dictionary, so for top-level items, | |
# we can simply call the key. | |
# Additionally, some items may not always exist, | |
# so must check for existence first | |
status_id = status['id'] | |
status_message = '' if 'message' not in status.keys() else status['message'].encode('utf-8') | |
link_name = '' if 'name' not in status.keys() else status['name'].encode('utf-8') | |
status_type = status['type'] | |
status_link = '' if 'link' not in status.keys() else status['link'] | |
# Time needs special care since a) it's in UTC and | |
# b) it's not easy to use in statistical programs. | |
status_published = datetime.datetime.strptime(status['created_time'],'%Y-%m-%dT%H:%M:%S+0000') | |
status_published = status_published + datetime.timedelta(hours=-5) # EST | |
status_published = status_published.strftime('%Y-%m-%d %H:%M:%S') # best time format for spreadsheet programs | |
# Nested items require chaining dictionary keys. | |
num_likes = 0 if 'likes' not in status.keys() else status['likes']['summary']['total_count'] | |
num_comments = 0 if 'comments' not in status.keys() else status['comments']['summary']['total_count'] | |
num_shares = 0 if 'shares' not in status.keys() else status['shares']['count'] | |
# return a tuple of all processed data | |
return (status_id, status_message, link_name, status_type, status_link, | |
status_published, num_likes, num_comments, num_shares) | |
def scrapeFacebookPageFeedStatus(page_id, access_token): | |
with open('{}_facebook_statuses.csv'.format(page_id), 'wb') as file: | |
w = csv.writer(file) | |
w.writerow(["status_id", "status_message", "link_name", "status_type", "status_link", | |
"status_published", "num_likes", "num_comments", "num_shares"]) | |
has_next_page = True | |
num_processed = 0 # keep a count on how many we've processed | |
scrape_starttime = datetime.datetime.now() | |
print "Scraping {} Facebook Page: {}\n".format(page_id, scrape_starttime) | |
statuses = getFacebookPageFeedData(page_id, access_token, 100) | |
while has_next_page: | |
for status in statuses['data']: | |
if status['type'] == 'photo': | |
w.writerow(processFacebookPageFeedStatus(status)) | |
# output progress occasionally to make sure code is not stalling | |
num_processed += 1 | |
if num_processed % 1000 == 0: | |
print "{} Statuses Processed: {}".format(num_processed, datetime.datetime.now()) | |
if num_processed == max_memes: | |
break | |
# if there is no next page, we're done. | |
if not num_processed % 100: | |
statuses = json.loads(request_until_succeed(statuses['paging']['next'])) | |
else: | |
has_next_page = False | |
print ("\nDone!\n{} Statuses Processed in {}".format(num_processed, datetime.datetime.now() - scrape_starttime)) | |
scrapeFacebookPageFeedStatus(page_id, access_token) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Trial of smart vision API | |
import requests, json, base64 | |
import os, sys | |
import urllib | |
import pdb | |
import subprocess | |
method_type = 'text1' | |
#types of methods | |
#tagging1 | |
#tagging2 | |
#tagging3 | |
#descriptions1 | |
#faces1 | |
#text1 | |
#faces2 | |
training_url = "https://www.dropbox.com/s/ggn2oy4px82k3hp/meme.zip" #"https://www.dropbox.com/s/lycxowpy4ikbqm7/exampletrain.zip" | |
#encodes image data for image recognition | |
def encodeImage(filename) : | |
with open(filename, "rb") as image_file: | |
encoded_string = base64.b64encode(image_file.read()) | |
return encoded_string | |
#posts image to API for registration | |
#create new image | |
def postImage(encoded_string) : | |
cmd_json = {'Content-Type':'application/json',} | |
cmd_params = '{"service":"%s","image":"%s"}'%(method_type,encoded_string) | |
cmd_auth = ('demo1','hackathon7493') | |
cmd_url = "http://smartvision.aiam-dh.com:8080/api/v1.0/tasks" | |
r = requests.post(cmd_url,auth = cmd_auth,data = cmd_params, headers = cmd_json) | |
res = json.loads(r.text) | |
file_id = res['task']['uri'].split('/')[-1] | |
return file_id | |
#runs NN on trained image | |
#run the image recognition servcie | |
def putRunImage(file_id) : | |
services = ['tagging1','tagging2','tagging3','descriptions1'] | |
cmd_header = {'Content-Type': 'application/json',} | |
cmd_data = '{"scanned":true}' | |
cmd_url = 'http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/run/' + file_id | |
r = requests.put(cmd_url, headers=cmd_header, data=cmd_data, auth=('demo1','hackathon7493')) | |
res = json.loads(r.text) | |
return res | |
#print res | |
#obtain information about uploaded images | |
def getImage() : | |
cmd_auth = ('demo1','hackathon7493') | |
cmd_url = "http://smartvision.aiam-dh.com:8080/api/v1.0/tasks" | |
r = requests.get(cmd_url,auth = cmd_auth) | |
res = json.loads(r.text) | |
print(res) | |
#obtain information about an uploaded image | |
def getImageSpecific(file_id) : | |
cmd_auth = ('demo1','hackathon7493') | |
cmd_url = "http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/" + file_id | |
r = requests.get(cmd_url,auth = cmd_auth) | |
res = json.loads(r.text) | |
#print("HELLLOOOOO") | |
print(res) | |
#update image with certain file id | |
def putUpdateImage(file_id) : | |
cmd_auth = ('demo1','hackathon7493') | |
cmd_header = {'Content-Type': 'application/json',} | |
cmd_data = '{"service":"%s"}'%(method_type) | |
cmd_url = """http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/update/%s"""%(file_id) | |
#Add check for successful | |
r = requests.put(cmd_url, auth = cmd_auth, headers = cmd_header, data = cmd_data) | |
res = json.loads(r.text) | |
print(res) | |
#delete an image | |
def deleteImage(file_id) : | |
cmd_auth = ('demo1','hackathon7493') | |
cmd_header = {'Content-Type': 'application/json',} | |
cmd_url = """http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/%s"""%(file_id) | |
r = requests.delete(cmd_url, auth = cmd_auth, headers = cmd_header) | |
res = json.loads(r.text) | |
print(res) | |
#posts training set | |
def postTraining(training_url) : | |
cmd_json = {'Content-Type':'application/json',} | |
cmd_params = '{"service":"%s","image":"%s"}'%(method_type,training_url) | |
cmd_auth = ('demo1','hackathon7493') | |
cmd_url = "http://smartvision.aiam-dh.com:8080/api/v1.0/tasks" | |
r = requests.post(cmd_url,auth = cmd_auth,data = cmd_params, headers = cmd_json) | |
res = json.loads(r.text) | |
zip_id = res['task']['uri'].split('/')[-1] | |
return(zip_id) | |
def putRunImageTrain(zip_id,encoded_string) : | |
cmd_header = {'Content-Type': 'application/json',} | |
cmd_params = '{"scanned":true,"image":"%s"}'%(encoded_string) | |
cmd_url = 'http://smartvision.aiam-dh.com:8080/api/v1.0/tasks/run/' + zip_id | |
r = requests.put(cmd_url, headers=cmd_header, data=cmd_params, auth=('demo1','hackathon7493')) | |
res = json.loads(r.text) | |
print(res) | |
#we only run if script is run from the command line | |
if __name__ == '__main__' : | |
command = str(sys.argv[1]) | |
encoded_string = encodeImage(command) | |
#train on NN and then post image to recognize face | |
if method_type == 'faces2' : | |
zip_id = postTraining("https://www.dropbox.com/s/ggn2oy4px82k3hp/meme.zip") #) "https://www.dropbox.com/s/ggn2oy4px82k3hp/meme.zip" | |
putRunImageTrain(zip_id,encoded_string) | |
print(zip_id) | |
else : | |
file_id = postImage(encoded_string) | |
putRunImage(file_id) | |
''' | |
To train data upload a zip folder | |
- upload zip using post? - faces2 | |
- post image | |
- run a put to run NN | |
- file id for the zip | |
''' | |
''' | |
#in order to update an image you need to | |
- post Image onto api | |
- update image (file id of old image you want to replace) | |
- Run the NN on the image | |
''' | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tweepy | |
from tweepy import OAuthHandler | |
from tweepy import Stream | |
from tweepy.streaming import StreamListener | |
CONSUMER_KEY= "EABODxTkrsk6YyfLj5IeoKPqi" | |
CONSUMER_SECRET= "8B337hrqalNKesiFf0ruosGRxdwe0HOk7jIb0VajANrxlvzftY" | |
ACCESS_TOKEN_KEY= "893683857123180545-WT4ME6bKuTvyM9FRbwykbXMFGGtruSE" | |
ACCESS_TOKEN_SECRET= "69ANY2P1u08RUMjoJKMw6wvPEWzbuWttiPoCCpMQ3Ph5f" | |
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) | |
auth.set_access_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET) | |
api = tweepy.API(auth) | |
class MyListener(StreamListener): | |
def on_data(self, data): | |
try: | |
with open('meme.txt', 'a') as f: | |
f.write(data) | |
return True | |
except BaseException as e: | |
print("Error on_data: %s" % str(e)) | |
return True | |
def on_error(self, status): | |
print(status) | |
return True | |
twitter_stream = Stream(auth, MyListener()) | |
twitter_stream.filter(track=['#meme']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment