Created
November 9, 2024 22:49
-
-
Save digisavvy/85c965b163bd8e41fc1898297afa6d2a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import sys | |
def debug_segmentation(): | |
try: | |
# Read the CSV file | |
print("Attempting to read CSV file...") | |
df = pd.read_csv('analysis.csv') | |
print(f"\nInitial data check:") | |
print(f"Number of rows loaded: {len(df)}") | |
# Filter out All Access users | |
df['has_all_access'] = df['Tags'].fillna('').str.contains('all access', case=False) | |
df_filtered = df[~df['has_all_access']] | |
print(f"Users removed due to All Access: {len(df) - len(df_filtered)}") | |
print(f"Remaining users: {len(df_filtered)}") | |
def calculate_engagement_score(user_data): | |
score = 0 | |
tags = str(user_data['Tags']).split(',') if pd.notna(user_data['Tags']) else [] | |
# Activity Diversity Score (0-30 points) | |
activity_types = set() | |
for tag in tags: | |
if '[Action]' in tag or '[Activity]' in tag or '[Event]' in tag: | |
activity_types.add(tag.split(']')[0] + ']') | |
activity_score = min(len(activity_types) * 10, 30) | |
score += activity_score | |
# Action Depth Score (0-25 points) | |
action_count = sum(1 for tag in tags if '[Action]' in tag) | |
action_score = min(action_count * 2.5, 25) | |
score += action_score | |
# Membership Status Score (0-20 points) | |
membership_score = 0 | |
if any('Member' in tag and not 'all access' in tag.lower() for tag in tags): | |
membership_score = 10 | |
score += membership_score | |
# Login/Account Status Score (0-15 points) | |
login_score = 0 | |
if any('Has Account' in tag for tag in tags): | |
login_score += 7.5 | |
if any('First Login' in tag for tag in tags): | |
login_score += 7.5 | |
score += login_score | |
# Content Consumption Score (0-10 points) | |
content_consumption = sum(1 for tag in tags if 'Downloaded' in tag or 'Played' in tag) | |
consumption_score = min(content_consumption, 10) | |
score += consumption_score | |
return score | |
def categorize_engagement(score): | |
if score >= 70: | |
return "Highly Engaged" | |
elif score >= 40: | |
return "Moderately Engaged" | |
else: | |
return "Low Engaged" | |
print("\nCalculating engagement scores...") | |
df_filtered['engagement_score'] = df_filtered.apply(calculate_engagement_score, axis=1) | |
df_filtered['engagement_level'] = df_filtered['engagement_score'].apply(categorize_engagement) | |
print("\nEngagement Level Distribution:") | |
print(df_filtered['engagement_level'].value_counts()) | |
print("\nEngagement Score Statistics:") | |
print(df_filtered['engagement_score'].describe()) | |
# Create segments | |
segments = { | |
'highly_engaged': df_filtered[df_filtered['engagement_level'] == 'Highly Engaged'], | |
'moderately_engaged': df_filtered[df_filtered['engagement_level'] == 'Moderately Engaged'], | |
'low_engaged': df_filtered[df_filtered['engagement_level'] == 'Low Engaged'] | |
} | |
# Export segments | |
for segment_name, segment_data in segments.items(): | |
if len(segment_data) > 0: | |
filename = f'{segment_name}_users_no_all_access.csv' | |
export_data = segment_data[['Email', 'First Name', 'Last Name', 'engagement_score', 'Tags']] | |
export_data = export_data.sort_values(by='engagement_score', ascending=False) | |
export_data.to_csv(filename, index=False) | |
print(f"\nExported {len(segment_data)} {segment_name} users to {filename}") | |
print(f"\nTop 3 {segment_name} users:") | |
print(export_data[['Email', 'engagement_score']].head(3)) | |
except Exception as e: | |
print(f"An error occurred: {str(e)}") | |
print("Error details:", sys.exc_info()) | |
if __name__ == "__main__": | |
debug_segmentation() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment