Skip to content

Instantly share code, notes, and snippets.

@exogen
Last active November 16, 2023 15:55
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save exogen/ea5f3afbb8db888b6690546ab06d24e4 to your computer and use it in GitHub Desktop.
Save exogen/ea5f3afbb8db888b6690546ab06d24e4 to your computer and use it in GitHub Desktop.
Tally your Uber trips from a Gmail export
#!/usr/bin/env python3
#
# Usage: python3 uber.py ~/Downloads/Takeout/Mail/Uber.mbox
#
# Dependencies: Python 3.4+
#
# How to get the .mbox export:
#
# In Gmail, create a filter that applies the label "Uber" to emails matching:
#
# from:(uber.com) subject:("uber ride receipt" OR "trip with uber" OR "ride with uber")
#
# Apply the filter to your past emails. Then, go to:
#
# https://takeout.google.com/settings/takeout
#
# Export your Mail data, selecting just the label "Uber".
# Google will send you an email containing an archive.
# Point this script at its contents.
#
import sys
import re
import mailbox
import email.utils
from datetime import datetime
from collections import defaultdict
def parse_amount(body):
# Uber has changed their email format over the years.
# This should find the amount charged for emails from 2011-2016 at least.
for amount in re.findall(r'([$]\d+(?:[.]\d+)?) has been charged', body):
return amount
for amount in re.findall(r'(?:Billed to Card:?|Amount Charged:) +\(([^)]+)\)', body):
return amount
for amount in re.findall(r'Billed [Tt]o Card: ([$]\d+(?:[.]\d+)?)', body):
return amount
for amount in re.findall(r'Total Fare: +([$]\d+(?:[.]\d+)?)', body):
return amount
for amount in re.findall(r'^\s*Total: +([$]\d+(?:[.]\d+)?)', body):
return amount
for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body):
# If you're debugging a newer table-based email format, it may help to uncomment:
# print(attrs, amount)
if 'final-charge' in attrs or 'totalPrice' in attrs:
return amount
def extract_trips(messages):
skipped = 0
for msg in messages:
if 'uber' not in msg['From'] or ':' in msg['Subject']:
continue
body = msg.get_payload()[0].get_payload(decode=True).decode()
if 'you have earned $10 in Uber credit' in body:
continue
date = email.utils.parsedate_to_datetime(msg['Date'])
amount = parse_amount(body)
if amount is None:
skipped += 1
else:
amount = float(amount[1:])
yield (date, amount)
if skipped:
print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped))
# If you want to debug this:
# print(body)
# ...and add a new regex to `parse_amount` above.
def summarize_trips(trips):
trips_by_year = defaultdict(int)
amount_by_year = defaultdict(float)
total_trips = 0
total_amount = 0
for date, amount in trips:
total_trips += 1
total_amount += amount
year = date.astimezone().year
trips_by_year[year] += 1
amount_by_year[year] += amount
print("You've spent a total of ${:.2f} on {} trip(s) with Uber.\n"
.format(total_amount, total_trips))
start_year = min(trips_by_year)
end_year = max(max(trips_by_year), email.utils.localtime().year)
for year in range(start_year, end_year + 1):
count = trips_by_year[year]
amount = amount_by_year[year]
print("{:7} ${:8.2f} {:3} trip(s)".format(year, amount, count))
if __name__ == '__main__':
mbox = mailbox.mbox(sys.argv[1], create=False)
trips = list(extract_trips(mbox))
summarize_trips(trips)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment