Skip to content

Instantly share code, notes, and snippets.

@jcrist
Created October 2, 2015 18:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jcrist/b5bfbf3be5ca8cf0c20d to your computer and use it in GitHub Desktop.
Save jcrist/b5bfbf3be5ca8cf0c20d to your computer and use it in GitHub Desktop.
Convert airline data to castra
from __future__ import print_function
import pandas as pd
from castra import Castra
from toolz import peek
def make_frame(year):
path = '{0}.csv'.format(year)
cols = ['year', 'month', 'day_of_month', 'day_of_week', 'deptime',
'crs_deptime', 'arrtime', 'crs_arrtime', 'unique_carrier',
'flight_num', 'tail_num', 'actual_elapsed_time',
'crs_elapsed_time', 'air_time', 'arr_delay', 'depdelay', 'origin',
'dest', 'distance', 'taxi_in', 'taxi_out', 'cancelled',
'cancellation_code', 'diverted', 'carrier_delay', 'weather_delay',
'nas_delay', 'security_delay', 'late_aircraft_delay']
df = pd.read_csv(path, header=0, names=cols,
dtype={'cancellation_code': object, 'taxi_in': float,
'taxi_out': float, 'cancelled': bool,
'diverted': bool, 'carrier_delay': float,
'weather_delay': float, 'nas_delay': float,
'security_delay': float, 'late_aircraft_delay': float})
# Add parsed departure date as an index
depart_date = pd.to_datetime(df.year*10000 + df.month*100 + df.day_of_month, format='%Y%m%d')
df['depart_date'] = depart_date
df = df.set_index('depart_date')
return df.sort_index(ascending=True)
frames = (make_frame(y) for y in range(1987, 2009))
df, frames = peek(frames)
c = Castra('airport.castra', template=df, categories=['dest', 'origin', 'cancellation_code', 'unique_carrier'])
c.extend_sequence(frames, freq='6M')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment