Skip to content

Instantly share code, notes, and snippets.

@digisavvy
Created November 9, 2024 22:49
Show Gist options
  • Save digisavvy/85c965b163bd8e41fc1898297afa6d2a to your computer and use it in GitHub Desktop.
Save digisavvy/85c965b163bd8e41fc1898297afa6d2a to your computer and use it in GitHub Desktop.
import pandas as pd
import sys
def debug_segmentation():
try:
# Read the CSV file
print("Attempting to read CSV file...")
df = pd.read_csv('analysis.csv')
print(f"\nInitial data check:")
print(f"Number of rows loaded: {len(df)}")
# Filter out All Access users
df['has_all_access'] = df['Tags'].fillna('').str.contains('all access', case=False)
df_filtered = df[~df['has_all_access']]
print(f"Users removed due to All Access: {len(df) - len(df_filtered)}")
print(f"Remaining users: {len(df_filtered)}")
def calculate_engagement_score(user_data):
score = 0
tags = str(user_data['Tags']).split(',') if pd.notna(user_data['Tags']) else []
# Activity Diversity Score (0-30 points)
activity_types = set()
for tag in tags:
if '[Action]' in tag or '[Activity]' in tag or '[Event]' in tag:
activity_types.add(tag.split(']')[0] + ']')
activity_score = min(len(activity_types) * 10, 30)
score += activity_score
# Action Depth Score (0-25 points)
action_count = sum(1 for tag in tags if '[Action]' in tag)
action_score = min(action_count * 2.5, 25)
score += action_score
# Membership Status Score (0-20 points)
membership_score = 0
if any('Member' in tag and not 'all access' in tag.lower() for tag in tags):
membership_score = 10
score += membership_score
# Login/Account Status Score (0-15 points)
login_score = 0
if any('Has Account' in tag for tag in tags):
login_score += 7.5
if any('First Login' in tag for tag in tags):
login_score += 7.5
score += login_score
# Content Consumption Score (0-10 points)
content_consumption = sum(1 for tag in tags if 'Downloaded' in tag or 'Played' in tag)
consumption_score = min(content_consumption, 10)
score += consumption_score
return score
def categorize_engagement(score):
if score >= 70:
return "Highly Engaged"
elif score >= 40:
return "Moderately Engaged"
else:
return "Low Engaged"
print("\nCalculating engagement scores...")
df_filtered['engagement_score'] = df_filtered.apply(calculate_engagement_score, axis=1)
df_filtered['engagement_level'] = df_filtered['engagement_score'].apply(categorize_engagement)
print("\nEngagement Level Distribution:")
print(df_filtered['engagement_level'].value_counts())
print("\nEngagement Score Statistics:")
print(df_filtered['engagement_score'].describe())
# Create segments
segments = {
'highly_engaged': df_filtered[df_filtered['engagement_level'] == 'Highly Engaged'],
'moderately_engaged': df_filtered[df_filtered['engagement_level'] == 'Moderately Engaged'],
'low_engaged': df_filtered[df_filtered['engagement_level'] == 'Low Engaged']
}
# Export segments
for segment_name, segment_data in segments.items():
if len(segment_data) > 0:
filename = f'{segment_name}_users_no_all_access.csv'
export_data = segment_data[['Email', 'First Name', 'Last Name', 'engagement_score', 'Tags']]
export_data = export_data.sort_values(by='engagement_score', ascending=False)
export_data.to_csv(filename, index=False)
print(f"\nExported {len(segment_data)} {segment_name} users to {filename}")
print(f"\nTop 3 {segment_name} users:")
print(export_data[['Email', 'engagement_score']].head(3))
except Exception as e:
print(f"An error occurred: {str(e)}")
print("Error details:", sys.exc_info())
if __name__ == "__main__":
debug_segmentation()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment