-
-
Save ParitoshSingh07/fe8729c0e790e1105ad7616d26a7bcb5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# Sample dataset | |
test_df = pd.DataFrame({'user_id': ['A', 'B', 'C', 'A', 'D', 'E', 'A', 'B'] | |
, 'app_open_time':['2021-06-20 23:40:32', '2021-06-20 23:50:32', '2021-06-21 23:40:32' | |
, '2021-06-20 23:45:32', '2021-06-23 15:20:10', '2021-06-23 15:25:10' | |
, '2021-06-20 23:53:10', '2021-06-21 12:45:20' ]}) | |
# The final dataframe should be created based on the following constraints: | |
# Datetime values within 5 minutes(<= 5 minutes) of each other should be counted as 1 for each user id | |
# Datetime values should be compared only within an user id instead of across user ids | |
# Intended output is given below | |
final_df = pd.DataFrame({'user_id': ['A', 'B', 'C', 'D', 'E'] | |
, 'app_opens': [2, 2, 1, 1, 1]}) | |
df = test_df.copy() | |
df['app_open_time'] = pd.to_datetime(df['app_open_time']) | |
def calc_unique_logins(series): | |
diffs = series.diff().dt.seconds.fillna(0) | |
counter = 1 | |
placeholder = 0 | |
for offset in diffs: | |
if offset - placeholder > 5 * 60: # 5 min | |
counter += 1 | |
placeholder = offset | |
return counter | |
df.groupby('user_id')['app_open_time'].apply(calc_unique_logins) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment