Created
November 2, 2017 14:21
-
-
Save astrotars/cfebc5a762db42f4f67ba65de1e251a7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#get my info | |
api.getSelfUsernameInfo() | |
result = api.LastJson | |
user_id = result['user']['pk'] # my own personal user id | |
me = result['user']['full_name'] # my own personal username | |
#get photos that I've liked | |
api.getLikedMedia() | |
result = api.LastJson | |
users = [item['user'] for item in result['items']] | |
# build up extended social network | |
follow_relationships = [] | |
for user in tqdm(users): | |
followed_user_id = user['pk'] | |
followed_user_name = user['full_name'] | |
follow_relationships.append((user_id, followed_user_id, me, followed_user_name)) | |
api.getUserFollowings(followed_user_id) | |
result2 = api.LastJson | |
for user2 in result2['users']: | |
follow_relationships.append((followed_user_id, user2['pk'], followed_user_name, user2['full_name'])) | |
df_global = pd.DataFrame(follow_relationships, columns=['src_id','dst_id', 'src_name', 'dst_name']) | |
all_user_ids_global = np.unique(df_global[['src_id', 'dst_id']].values.reshape(1,-1)) | |
#create social graph and calculate pagerank | |
G = nx.from_pandas_dataframe(df_global, 'src_id', 'dst_id') | |
#calculate personalized pagerank | |
perzonalization_dict = dict(zip(G.nodes(), [0]*len(G.nodes()))) | |
perzonalization_dict[user_id] = 1 | |
ppr = nx.pagerank(G, personalization=perzonalization_dict) | |
#this may take a while if you follow a lot of people | |
urls = [] | |
taken_at = [] | |
num_likes = [] | |
num_comments = [] | |
page_rank = [] | |
users = [] | |
for user_id in tqdm(all_user_ids_global): | |
api.getUserFeed(user_id) | |
result = api.LastJson | |
if 'items' in result.keys(): | |
for item in result['items']: | |
if 'image_versions2' in item.keys(): #only grabbing pictures (no videos or carousels) | |
# make sure we can grab keys before trying to append | |
url = item['image_versions2']['candidates'][1]['url'] | |
taken = item['taken_at'] | |
try: | |
likes = item['like_count'] | |
except KeyError: | |
likes = 0 | |
try: | |
comments = item['comment_count'] | |
except KeyError: | |
comments = 0 | |
pr = ppr[item['user']['pk']] | |
user = item['user']['full_name'] | |
if user != me: #don't count myself! | |
urls.append(url) | |
taken_at.append(taken) | |
num_likes.append(likes) | |
num_comments.append(comments) | |
page_rank.append(pr) | |
users.append(user) | |
#now we can make a dataframe with all of that information | |
scores_df = pd.DataFrame( | |
{'urls': urls, | |
'taken_at': taken_at, | |
'num_likes': num_likes, | |
'num_comments': num_comments, | |
'page_rank': page_rank, | |
'users': users, | |
}) | |
#don't care about anything older than 1 week | |
oldest_time = int((datetime.datetime.now() | |
- datetime.timedelta(weeks = 1)).strftime('%s')) | |
# For a discovery feed we don't want to show photos of poeple we already follow | |
scores_df = scores_df[~scores_df['users'].isin(df_local['dst_name'])] | |
scores_df = scores_df[scores_df['taken_at'] > oldest_time] | |
# /1e5 to help out with some machine precision (numbers get real small otherwise) | |
scores_df['time_score'] = np.exp(-(int(time.time()) - scores_df['taken_at'])/1e5) | |
scores_df['total_score'] = (np.log10(scores_df['num_comments']+2) * np.log10(scores_df['num_likes']+1) | |
* scores_df['page_rank'] * scores_df['time_score']) | |
# calculate top ten highest rated posts | |
top_ten = scores_df['total_score'].nlargest(10) | |
top_rows = scores_df.loc[top_ten.index].values | |
top_personal_img = [] | |
top_graph_img = [] | |
#display the feed | |
for row in top_rows: | |
img = Image(row[4], format='jpeg') | |
top_graph_img.append(img) | |
display(img) | |
top_personal_img.append(img) | |
print('taken_at: %s' % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(row[3])) ) | |
print('number of likes: %s' % row[1]) | |
print('number of comments: %s' % row[0]) | |
print('page_rank: %s' % row[2]) | |
print(row[5]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment