summerofgeorge/copilot-python-excel-customer-churn-dataset.py

## copilot-python-excel-customer-churn-dataset.py
import pandas as pd
import numpy as np
from faker import Faker

# Set random seed for reproducibility
np.random.seed(1234)

# Initialize Faker instance with seed
fake = Faker(seed=1234)

# Generate customer data
n_customers = 5000
customer_ids = range(1, n_customers + 1)

# Generate normally distributed ages
ages = np.random.normal(loc=35, scale=10, size=n_customers)

# Generate uniformly distributed tenures
tenures = np.random.randint(low=1, high=73, size=n_customers)

# Generate normally distributed monthly charges
monthly_charges = np.random.normal(loc=70, scale=20, size=n_customers)

# Calculate total charges
total_charges = monthly_charges * tenures

# Generate churn (0 or 1) with a probability of 0.2
churn_prob = 0.2
churn = np.random.choice([0, 1], size=n_customers, p=[1 - churn_prob, churn_prob])

# Create DataFrame
customer_df = pd.DataFrame({
    'Customer ID': customer_ids,
    'Age': ages,
    'Tenure': tenures,
    'Monthly Charges': monthly_charges,
    'Total Charges': total_charges,
    'Churn': churn
})

# Save to Excel workbook
customer_df.to_excel('customer_churn.xlsx', index=False)
print("Synthetic dataset saved to 'customer_churn.xlsx'")
	import pandas as pd
	import numpy as np
	from faker import Faker

	# Set random seed for reproducibility
	np.random.seed(1234)

	# Initialize Faker instance with seed
	fake = Faker(seed=1234)

	# Generate customer data
	n_customers = 5000
	customer_ids = range(1, n_customers + 1)

	# Generate normally distributed ages
	ages = np.random.normal(loc=35, scale=10, size=n_customers)

	# Generate uniformly distributed tenures
	tenures = np.random.randint(low=1, high=73, size=n_customers)

	# Generate normally distributed monthly charges
	monthly_charges = np.random.normal(loc=70, scale=20, size=n_customers)

	# Calculate total charges
	total_charges = monthly_charges * tenures

	# Generate churn (0 or 1) with a probability of 0.2
	churn_prob = 0.2
	churn = np.random.choice([0, 1], size=n_customers, p=[1 - churn_prob, churn_prob])

	# Create DataFrame
	customer_df = pd.DataFrame({
	'Customer ID': customer_ids,
	'Age': ages,
	'Tenure': tenures,
	'Monthly Charges': monthly_charges,
	'Total Charges': total_charges,
	'Churn': churn
	})

	# Save to Excel workbook
	customer_df.to_excel('customer_churn.xlsx', index=False)
	print("Synthetic dataset saved to 'customer_churn.xlsx'")