Last active
April 13, 2020 00:47
-
-
Save nicolasesnis/e5a8ba4cf96be825720b496ce9ca6035 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Working on the links_dict | |
output.update({'links_dict': dict()}) | |
# Group the DataFrame by user_id and rank_event | |
grouped = data.groupby(['user_id', 'rank_event']) | |
# Define a function to read the souces, targets, values and time from event to next_event: | |
def update_source_target(user): | |
try: | |
# user.name[0] is the user's user_id; user.name[1] is the rank of each action | |
# 1st we retrieve the source and target's indices from nodes_dict | |
source_index = output['nodes_dict'][user.name[1]]['sources_index'][output['nodes_dict'] | |
[user.name[1]]['sources'].index(user['event_name'].values[0])] | |
target_index = output['nodes_dict'][user.name[1] + 1]['sources_index'][output['nodes_dict'] | |
[user.name[1] + 1]['sources'].index(user['next_event'].values[0])] | |
# If this source is already in links_dict... | |
if source_index in output['links_dict']: | |
# ...and if this target is already associated to this source... | |
if target_index in output['links_dict'][source_index]: | |
# ...then we increment the count of users with this source/target pair by 1, and keep track of the time from source to target | |
output['links_dict'][source_index][target_index]['unique_users'] += 1 | |
output['links_dict'][source_index][target_index]['avg_time_to_next'] += user['time_to_next'].values[0] | |
# ...but if the target is not already associated to this source... | |
else: | |
# ...we create a new key for this target, for this source, and initiate it with 1 user and the time from source to target | |
output['links_dict'][source_index].update({target_index: | |
dict( | |
{'unique_users': 1, | |
'avg_time_to_next': user['time_to_next'].values[0]} | |
) | |
}) | |
# ...but if this source isn't already available in the links_dict, we create its key and the key of this source's target, and we initiate it with 1 user and the time from source to target | |
else: | |
output['links_dict'].update({source_index: dict({target_index: dict( | |
{'unique_users': 1, 'avg_time_to_next': user['time_to_next'].values[0]})})}) | |
except Exception as e: | |
pass | |
# Apply the function to your grouped Pandas object: | |
grouped.apply(lambda user: update_source_target(user)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment