Skip to content

Instantly share code, notes, and snippets.

Copilot Python Excel customer churn dataset
import pandas as pd
import numpy as np
from faker import Faker
# Set random seed for reproducibility
np.random.seed(1234)
# Initialize Faker instance with seed
fake = Faker(seed=1234)
# Generate customer data
n_customers = 5000
customer_ids = range(1, n_customers + 1)
# Generate normally distributed ages
ages = np.random.normal(loc=35, scale=10, size=n_customers)
# Generate uniformly distributed tenures
tenures = np.random.randint(low=1, high=73, size=n_customers)
# Generate normally distributed monthly charges
monthly_charges = np.random.normal(loc=70, scale=20, size=n_customers)
# Calculate total charges
total_charges = monthly_charges * tenures
# Generate churn (0 or 1) with a probability of 0.2
churn_prob = 0.2
churn = np.random.choice([0, 1], size=n_customers, p=[1 - churn_prob, churn_prob])
# Create DataFrame
customer_df = pd.DataFrame({
'Customer ID': customer_ids,
'Age': ages,
'Tenure': tenures,
'Monthly Charges': monthly_charges,
'Total Charges': total_charges,
'Churn': churn
})
# Save to Excel workbook
customer_df.to_excel('customer_churn.xlsx', index=False)
print("Synthetic dataset saved to 'customer_churn.xlsx'")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment