Skip to content

Instantly share code, notes, and snippets.

@molly
Created January 12, 2024 20:37
Show Gist options
  • Save molly/712cf40fe1469e03d486b62028dc0fc9 to your computer and use it in GitHub Desktop.
Save molly/712cf40fe1469e03d486b62028dc0fc9 to your computer and use it in GitHub Desktop.
Merge Substack subscriber CSVs
import pandas as pd
from pandas import notna, isna
if __name__ == "__main__":
paid_all_data = pd.read_csv("/Users/molly/Desktop/paid.csv")
paid_with_stripe = pd.read_csv("/Users/molly/Desktop/mollywhite_subscriber_emails_2023-12-24.csv")
all_all_data = pd.read_csv("/Users/molly/Desktop/all.csv")
all_with_misc = pd.read_csv("/Users/molly/Desktop/mollywhite_signup_emails_2023-12-24.csv")
all_combined = pd.merge(all_all_data, all_with_misc, on="email", how="outer", suffixes=('', '_DROP'))
all_combined = pd.merge(all_combined, paid_all_data, on="email", how="outer", suffixes=('', '_DROP'))
all_combined = pd.merge(all_combined, paid_with_stripe, on="email", how="outer", suffixes=('', '_DROP')).filter(regex='^(?!.*_DROP)')
all_combined["note"] = None
all_combined["labels"] = None
for i, row in all_combined.iterrows():
labels = ""
note = ""
all_combined.at[i, "subscribed_to_emails"] = False if row["Receiving emails for"] == "" or isna(row["Receiving emails for"]) else True
all_combined.at[i, "complimentary_plan"] = row["Subscription type"] == "Comp"
if row["Subscription type"] and type(row["Subscription type"]) is str and "Gift" in row["Subscription type"]:
labels = "gift"
all_combined.at[i, "labels"] = labels
if notna(row["Subscription source (free)"]) and row["Subscription source (free)"] is not None:
note += "Subscription source (free): " + row["Subscription source (free)"]
if notna(row["Subscription source (paid)"]) and row["Subscription source (paid)"] is not None:
note += "; Subscription source (paid): " + row["Subscription source (paid)"]
if notna(row["First payment at"]) and row["First payment at"] is not None:
note += "; First payment at: " + row["First payment at"]
if notna(row["Paid upgrade date"]) and row["Paid upgrade date"] is not None:
note += "; Paid upgrade date: " + row["Paid upgrade date"]
if notna(row["Cancel date"]) and row["Cancel date"] is not None:
note += "; Cancel date: " + row["Cancel date"]
if notna(row["expiry"]) and row["expiry"] is not None:
note += "; Expiry: " + row["expiry"]
all_combined.at[i, "note"] = note
all_combined.to_csv("/Users/molly/Desktop/final.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment