Skip to content

Instantly share code, notes, and snippets.

@askedrelic
Created June 23, 2025 05:19
Show Gist options
  • Select an option

  • Save askedrelic/0f605ab5c0923541a4c1df2bc1dcd385 to your computer and use it in GitHub Desktop.

Select an option

Save askedrelic/0f605ab5c0923541a4c1df2bc1dcd385 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
McDonald's Receipt Analyzer
Parses HTML email receipts from McDonald's to extract order data and statistics.
"""
import re
import html
import email
from pathlib import Path
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from datetime import datetime
import json
@dataclass
class OrderItem:
name: str
quantity: int
price: float
is_promotion: bool = False
@dataclass
class Receipt:
date: str
time: str
location: str
check_in_number: str
items: List[OrderItem]
subtotal: float
tax: float
total: float
total_savings: float
payment_method: str
file_path: str
class McDonaldsReceiptParser:
def __init__(self):
self.receipts: List[Receipt] = []
def parse_eml_file(self, file_path: Path) -> Optional[Receipt]:
"""Parse a single .eml file and extract receipt data."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
msg = email.message_from_file(f)
# Get HTML content
html_content = None
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/html":
html_content = part.get_payload(decode=True).decode('utf-8')
break
else:
if msg.get_content_type() == "text/html":
html_content = msg.get_payload(decode=True).decode('utf-8')
if not html_content:
return None
return self.parse_html_receipt(html_content, str(file_path))
except Exception as e:
print(f"Error parsing {file_path}: {e}")
return None
def parse_html_receipt(self, html_content: str, file_path: str) -> Optional[Receipt]:
"""Parse HTML content to extract receipt data."""
try:
# Decode quoted-printable encoding
html_content = html_content.replace('=\n', '') # Remove soft line breaks
html_content = re.sub(r'=([0-9A-F]{2})', lambda m: chr(int(m.group(1), 16)), html_content)
# Extract basic info
date = self.extract_date(html_content)
time = self.extract_time(html_content)
location = self.extract_location(html_content)
check_in_number = self.extract_check_in_number(html_content)
# Extract items
items = self.extract_items(html_content)
# Extract totals
subtotal = self.extract_subtotal(html_content)
tax = self.extract_tax(html_content)
total = self.extract_total(html_content)
total_savings = self.extract_total_savings(html_content)
# Extract payment method
payment_method = self.extract_payment_method(html_content)
return Receipt(
date=date,
time=time,
location=location,
check_in_number=check_in_number,
items=items,
subtotal=subtotal,
tax=tax,
total=total,
total_savings=total_savings,
payment_method=payment_method,
file_path=file_path
)
except Exception as e:
print(f"Error parsing HTML content from {file_path}: {e}")
return None
def extract_date(self, html_content: str) -> str:
"""Extract order date from HTML."""
# Look for date pattern like "01/04/2025" or "03/01/2025"
date_match = re.search(r'(\d{2}/\d{2}/\d{4})', html_content)
return date_match.group(1) if date_match else ""
def extract_time(self, html_content: str) -> str:
"""Extract order time from HTML."""
# Look for time pattern like "09:04:30 AM"
time_match = re.search(r'(\d{2}:\d{2}:\d{2}\s*[AP]M)', html_content)
return time_match.group(1) if time_match else ""
def extract_location(self, html_content: str) -> str:
"""Extract restaurant location from HTML."""
# Look for the restaurant address
location_match = re.search(r'(\d+\s+[^<]+BLVD[^<]*)', html_content)
if location_match:
return location_match.group(1).strip()
return ""
def extract_check_in_number(self, html_content: str) -> str:
"""Extract check-in number from HTML."""
# Look for pattern like "Check-In# RF88-8227355"
checkin_match = re.search(r'Check-In#\s*([A-Z0-9-]+)', html_content)
return checkin_match.group(1) if checkin_match else ""
def extract_items(self, html_content: str) -> List[OrderItem]:
"""Extract ordered items from HTML."""
items = []
# Find the products table
products_table_match = re.search(r'<table[^>]*id="products"[^>]*>(.*?)</table>', html_content, re.DOTALL)
if not products_table_match:
return items
table_content = products_table_match.group(1)
# Find all item rows
item_rows = re.findall(r'<tr[^>]*style="height:15pt"[^>]*>(.*?)</tr>', table_content, re.DOTALL)
current_promotion = None
for row in item_rows:
# Check if this is a promotion header
promotion_match = re.search(r'<td[^>]*>([^<]*(?:BOGO|Buy[^<]*get[^<]*FREE|Buy One[^<]*Add One)[^<]*)</td>', row)
if promotion_match:
current_promotion = promotion_match.group(1).strip()
continue
# Check if this is an item with price
item_match = re.search(r'<td[^>]*font-weight:bold[^>]*>([^<]+)</td>.*?Qty\s*(\d+).*?\$(\d+\.\d{2})', row, re.DOTALL)
if item_match:
name = item_match.group(1).strip()
# Clean up the name
name = re.sub(r'\s+', ' ', name)
name = re.sub(r'®|™', '', name)
quantity = int(item_match.group(2))
price = float(item_match.group(3))
is_promotion = current_promotion is not None and price == 0.0
items.append(OrderItem(
name=name,
quantity=quantity,
price=price,
is_promotion=is_promotion
))
return items
def extract_subtotal(self, html_content: str) -> float:
"""Extract subtotal from HTML."""
subtotal_match = re.search(r'Subtotal.*?\$(\d+\.\d{2})', html_content, re.DOTALL)
return float(subtotal_match.group(1)) if subtotal_match else 0.0
def extract_tax(self, html_content: str) -> float:
"""Extract tax amount from HTML."""
tax_match = re.search(r'Tax Amount.*?\$(\d+\.\d{2})', html_content, re.DOTALL)
return float(tax_match.group(1)) if tax_match else 0.0
def extract_total(self, html_content: str) -> float:
"""Extract total amount from HTML."""
# Look for Total with specific styling
total_match = re.search(r'Total.*?\$(\d+\.\d{2})', html_content, re.DOTALL)
return float(total_match.group(1)) if total_match else 0.0
def extract_total_savings(self, html_content: str) -> float:
"""Extract total savings from HTML."""
savings_match = re.search(r'Total Savings.*?\$(\d+\.\d{2})', html_content, re.DOTALL)
return float(savings_match.group(1)) if savings_match else 0.0
def extract_payment_method(self, html_content: str) -> str:
"""Extract payment method from HTML."""
payment_match = re.search(r'Card Type:\s*([^<]+)', html_content)
return payment_match.group(1).strip() if payment_match else ""
def process_all_receipts(self, receipt_files: List[Path]):
"""Process all receipt files."""
print(f"Processing {len(receipt_files)} receipt files...")
for file_path in receipt_files:
receipt = self.parse_eml_file(file_path)
if receipt:
self.receipts.append(receipt)
print(f"✓ Processed: {file_path.name} - {receipt.date} - ${receipt.total}")
else:
print(f"✗ Failed to process: {file_path.name}")
print(f"\nSuccessfully processed {len(self.receipts)} out of {len(receipt_files)} receipts")
def generate_statistics(self) -> Dict:
"""Generate comprehensive statistics from all receipts."""
if not self.receipts:
return {}
stats = {
'total_receipts': len(self.receipts),
'total_spent': sum(r.total for r in self.receipts),
'total_savings': sum(r.total_savings for r in self.receipts),
'average_order': sum(r.total for r in self.receipts) / len(self.receipts),
'date_range': {
'earliest': min(r.date for r in self.receipts if r.date),
'latest': max(r.date for r in self.receipts if r.date)
}
}
# Item frequency analysis
item_counts = {}
item_revenue = {}
for receipt in self.receipts:
for item in receipt.items:
if item.name not in item_counts:
item_counts[item.name] = 0
item_revenue[item.name] = 0.0
item_counts[item.name] += item.quantity
item_revenue[item.name] += item.price * item.quantity
# Sort by frequency
popular_items = sorted(item_counts.items(), key=lambda x: x[1], reverse=True)
stats['most_popular_items'] = popular_items[:10]
# Sort by revenue
revenue_items = sorted(item_revenue.items(), key=lambda x: x[1], reverse=True)
stats['highest_revenue_items'] = revenue_items[:10]
# Monthly spending
monthly_spending = {}
for receipt in self.receipts:
if receipt.date:
try:
date_obj = datetime.strptime(receipt.date, '%m/%d/%Y')
month_key = date_obj.strftime('%Y-%m')
if month_key not in monthly_spending:
monthly_spending[month_key] = 0.0
monthly_spending[month_key] += receipt.total
except:
pass
stats['monthly_spending'] = dict(sorted(monthly_spending.items()))
# Payment method analysis
payment_methods = {}
for receipt in self.receipts:
method = receipt.payment_method or 'Unknown'
payment_methods[method] = payment_methods.get(method, 0) + 1
stats['payment_methods'] = payment_methods
return stats
def print_summary_report(self):
"""Print a formatted summary report."""
if not self.receipts:
print("No receipts to analyze.")
return
stats = self.generate_statistics()
print("\n" + "="*60)
print("McDONALD'S RECEIPT ANALYSIS SUMMARY")
print("="*60)
print(f"\n📊 OVERVIEW:")
print(f" Total Receipts: {stats['total_receipts']}")
print(f" Date Range: {stats['date_range']['earliest']} to {stats['date_range']['latest']}")
print(f" Total Spent: ${stats['total_spent']:.2f}")
print(f" Total Savings: ${stats['total_savings']:.2f}")
print(f" Average Order: ${stats['average_order']:.2f}")
print(f"\n🍔 MOST POPULAR ITEMS:")
for i, (item, count) in enumerate(stats['most_popular_items'][:5], 1):
print(f" {i}. {item}: {count} orders")
print(f"\n💰 HIGHEST REVENUE ITEMS:")
for i, (item, revenue) in enumerate(stats['highest_revenue_items'][:5], 1):
print(f" {i}. {item}: ${revenue:.2f}")
print(f"\n📅 MONTHLY SPENDING:")
for month, total in stats['monthly_spending'].items():
print(f" {month}: ${total:.2f}")
print(f"\n💳 PAYMENT METHODS:")
for method, count in stats['payment_methods'].items():
print(f" {method}: {count} times")
print("\n" + "="*60)
def main():
"""Main function to run the receipt analyzer."""
# Define the path to McDonald's receipt files
receipt_files = [
]
# Initialize parser
parser = McDonaldsReceiptParser()
# Process all receipts
parser.process_all_receipts(receipt_files)
# Generate and print summary report
parser.print_summary_report()
# Save detailed data to JSON
stats = parser.generate_statistics()
output_file = Path("mcdonalds_analysis.json")
with open(output_file, 'w') as f:
json.dump(stats, f, indent=2)
print(f"\n📁 Detailed analysis saved to: {output_file}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment