Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save guy4261/c55cccaac4c816d287349bfaf4d76e4b to your computer and use it in GitHub Desktop.
Save guy4261/c55cccaac4c816d287349bfaf4d76e4b to your computer and use it in GitHub Desktop.
Turn you LinkedIn messages from a data export to csv (and help RIF'd friends)
#!/usr/bin/env python
# coding: utf-8
import os
import webbrowser
import zipfile
from datetime import datetime
from glob import glob
from io import BytesIO
import pandas as pd
# Find your downloaded LinkedIn data
linkedin_data_export_url = "https://www.linkedin.com/mypreferences/d/download-my-data"
google_sheet_url = "https://drive.google.com/"
default_path_glob = "~/Downloads/Basic_LinkedInDataExport_*.zip"
print(f"Looking for your export from {linkedin_data_export_url}")
print(f"under {default_path_glob}")
paths = glob(os.path.expanduser(default_path_glob))
assert len(paths) > 0, "No files found!"
# If you made multiple exports, get the latest
if len(paths) == 1:
path = paths[0]
else:
path = max(
paths,
key=lambda s: datetime.strptime(
os.path.basename(s), "Basic_LinkedInDataExport_%d-%m-%Y.zip"
),
)
# Ensure your chosen export has your messages.csv in it
zf = zipfile.ZipFile(path)
messages_zipinfo = None
for zipinfo in zf.filelist:
if zipinfo.filename == "messages.csv":
messages_zipinfo = zipinfo
break
assert (
messages_zipinfo is not None
), f"Did not find messages.csv in your data export! Re-run {linkedin_data_export_url}"
# Load the CSV from inside the zipfile using pandas
buf = BytesIO(zf.read(messages_zipinfo))
buf.seek(0)
df = pd.read_csv(buf)
# Get only incoming messages sent to me
whoami = df["TO"].mode().tolist()
assert len(whoami) == 1, f"Not sure who you are: {whoami}"
whoami = whoami[0]
df = df[df["TO"] == whoami]
df = df[df["FOLDER"] == "INBOX"]
# Get the earliest message sent by each recruiter to ignore nagging
df = df.groupby("FROM")["DATE"].min().reset_index().merge(df)
# Get only messages sent after the RIF
rif_date = "2023-06-01 00:00:00 UTC"
df = df[df["DATE"] >= rif_date]
# Order and save
df = df[["DATE", "TO", "FROM", "SENDER PROFILE URL", "SUBJECT", "CONTENT"]]
outfile = f"{whoami}_job_offers.csv"
df.to_csv(outfile, index=False)
# Now go upload the data!
print(f"Now upload {os.path.realpath(outfile)} to {google_sheet_url}")
webbrowser.open(google_sheet_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment