Skip to content

Instantly share code, notes, and snippets.

@astrotars
Created November 2, 2017 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save astrotars/cfebc5a762db42f4f67ba65de1e251a7 to your computer and use it in GitHub Desktop.
Save astrotars/cfebc5a762db42f4f67ba65de1e251a7 to your computer and use it in GitHub Desktop.
#get my info
api.getSelfUsernameInfo()
result = api.LastJson
user_id = result['user']['pk'] # my own personal user id
me = result['user']['full_name'] # my own personal username
#get photos that I've liked
api.getLikedMedia()
result = api.LastJson
users = [item['user'] for item in result['items']]
# build up extended social network
follow_relationships = []
for user in tqdm(users):
followed_user_id = user['pk']
followed_user_name = user['full_name']
follow_relationships.append((user_id, followed_user_id, me, followed_user_name))
api.getUserFollowings(followed_user_id)
result2 = api.LastJson
for user2 in result2['users']:
follow_relationships.append((followed_user_id, user2['pk'], followed_user_name, user2['full_name']))
df_global = pd.DataFrame(follow_relationships, columns=['src_id','dst_id', 'src_name', 'dst_name'])
all_user_ids_global = np.unique(df_global[['src_id', 'dst_id']].values.reshape(1,-1))
#create social graph and calculate pagerank
G = nx.from_pandas_dataframe(df_global, 'src_id', 'dst_id')
#calculate personalized pagerank
perzonalization_dict = dict(zip(G.nodes(), [0]*len(G.nodes())))
perzonalization_dict[user_id] = 1
ppr = nx.pagerank(G, personalization=perzonalization_dict)
#this may take a while if you follow a lot of people
urls = []
taken_at = []
num_likes = []
num_comments = []
page_rank = []
users = []
for user_id in tqdm(all_user_ids_global):
api.getUserFeed(user_id)
result = api.LastJson
if 'items' in result.keys():
for item in result['items']:
if 'image_versions2' in item.keys(): #only grabbing pictures (no videos or carousels)
# make sure we can grab keys before trying to append
url = item['image_versions2']['candidates'][1]['url']
taken = item['taken_at']
try:
likes = item['like_count']
except KeyError:
likes = 0
try:
comments = item['comment_count']
except KeyError:
comments = 0
pr = ppr[item['user']['pk']]
user = item['user']['full_name']
if user != me: #don't count myself!
urls.append(url)
taken_at.append(taken)
num_likes.append(likes)
num_comments.append(comments)
page_rank.append(pr)
users.append(user)
#now we can make a dataframe with all of that information
scores_df = pd.DataFrame(
{'urls': urls,
'taken_at': taken_at,
'num_likes': num_likes,
'num_comments': num_comments,
'page_rank': page_rank,
'users': users,
})
#don't care about anything older than 1 week
oldest_time = int((datetime.datetime.now()
- datetime.timedelta(weeks = 1)).strftime('%s'))
# For a discovery feed we don't want to show photos of poeple we already follow
scores_df = scores_df[~scores_df['users'].isin(df_local['dst_name'])]
scores_df = scores_df[scores_df['taken_at'] > oldest_time]
# /1e5 to help out with some machine precision (numbers get real small otherwise)
scores_df['time_score'] = np.exp(-(int(time.time()) - scores_df['taken_at'])/1e5)
scores_df['total_score'] = (np.log10(scores_df['num_comments']+2) * np.log10(scores_df['num_likes']+1)
* scores_df['page_rank'] * scores_df['time_score'])
# calculate top ten highest rated posts
top_ten = scores_df['total_score'].nlargest(10)
top_rows = scores_df.loc[top_ten.index].values
top_personal_img = []
top_graph_img = []
#display the feed
for row in top_rows:
img = Image(row[4], format='jpeg')
top_graph_img.append(img)
display(img)
top_personal_img.append(img)
print('taken_at: %s' % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(row[3])) )
print('number of likes: %s' % row[1])
print('number of comments: %s' % row[0])
print('page_rank: %s' % row[2])
print(row[5])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment