Skip to content

Instantly share code, notes, and snippets.

View gjreda's full-sized avatar

Greg Reda gjreda

View GitHub Profile
@gjreda
gjreda / gist:7433f5f70299610d9b6b
Last active April 11, 2023 16:23
pandas' read_csv parse_dates vs explicit date conversion
# When you're sure of the format, it's much quicker to explicitly convert your dates than use `parse_dates`
# Makes sense; was just surprised by the time difference.
import pandas as pd
from datetime import datetime
to_datetime = lambda d: datetime.strptime(d, '%m/%d/%Y %H:%M')
%time trips = pd.read_csv('data/divvy/Divvy_Trips_2013.csv', parse_dates=['starttime', 'stoptime'])
# CPU times: user 1min 29s, sys: 331 ms, total: 1min 29s
# Wall time: 1min 30s
@gjreda
gjreda / gist:8611946
Created January 25, 2014 04:37
Weird numpy/pandas groupby behavior when using min() on a np.datetime64 field.
# OSX 10.7.5
# python 2.7.5
# pandas 0.13.0
# numpy 1.8.0
import pandas as pd
import numpy as np
from StringIO import StringIO
d = """row1,'2013-10-01'
@gjreda
gjreda / espn-cbb.py
Created October 26, 2013 22:24
Grabs college basketball play-by-play data for a given date range. Example usage: python espn.cbb.py 2013-01-01 2013-01-07
from bs4 import BeautifulSoup
from urllib2 import urlopen
from datetime import datetime, timedelta
from time import sleep
import sys
import csv
# CONSTANTS
ESPN_URL = "http://scores.espn.go.com"