Last active
July 15, 2024 18:54
Copilot Python Excel customer churn dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from faker import Faker | |
# Set random seed for reproducibility | |
np.random.seed(1234) | |
# Initialize Faker instance with seed | |
fake = Faker(seed=1234) | |
# Generate customer data | |
n_customers = 5000 | |
customer_ids = range(1, n_customers + 1) | |
# Generate normally distributed ages | |
ages = np.random.normal(loc=35, scale=10, size=n_customers) | |
# Generate uniformly distributed tenures | |
tenures = np.random.randint(low=1, high=73, size=n_customers) | |
# Generate normally distributed monthly charges | |
monthly_charges = np.random.normal(loc=70, scale=20, size=n_customers) | |
# Calculate total charges | |
total_charges = monthly_charges * tenures | |
# Generate churn (0 or 1) with a probability of 0.2 | |
churn_prob = 0.2 | |
churn = np.random.choice([0, 1], size=n_customers, p=[1 - churn_prob, churn_prob]) | |
# Create DataFrame | |
customer_df = pd.DataFrame({ | |
'Customer ID': customer_ids, | |
'Age': ages, | |
'Tenure': tenures, | |
'Monthly Charges': monthly_charges, | |
'Total Charges': total_charges, | |
'Churn': churn | |
}) | |
# Save to Excel workbook | |
customer_df.to_excel('customer_churn.xlsx', index=False) | |
print("Synthetic dataset saved to 'customer_churn.xlsx'") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment