Last active
August 29, 2015 14:21
-
-
Save LNA/a99e9a1a62f2b1ce784e to your computer and use it in GitHub Desktop.
Pandas Workshop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Launch Python: | |
#cd anaconda | |
#open Launcher.app | |
ridership = pd.read_csv('https://raw.githubusercontent.com/zafia/talks/gh-pages/pandas-class/cta_ridership.csv') | |
ridership.loc[ridership.stationname.unique()] | |
ridership.loc[ridership.stationname.str.contains("Mo”)] | |
ridership.loc[ridership.stationname == "Morse"] | |
stations = ridership.groupby("stationname") => #<pandas.core.groupby.DataFrameGroupBy object at 0x102a3e310> | |
type(stations) => #pandas.core.groupby.DataFrameGroupBy | |
stations.rides.sum().max() | |
daily = ridership.groupby("date") | |
daily.rides.sum().idxmax() # most rides; lollapooloza | |
daily.rides.sum() | |
import matplotlib.pyplot as plt | |
%matplotlib inline # prints graphs for you | |
#a column in pandas is called a series | |
daily_ridership = pd.Series(daily.rides.sum()) | |
daily_ridership.plot() #will plot a graph for you!!! | |
Out[42]: <matplotlib.axes._subplots.AxesSubplot at 0x10d1134d0> | |
ridership.daytype = ridership.daytype.map({"A" : "saturday", "U" : "sunday", "W" : "weekday"}) | |
weekdays = ridership.loc[ridership.daytype == "weekday"] | |
weekdays.head(10) | |
Out[60]: | |
station_id stationname date daytype rides | |
423 41670 Conservatory 2010-01-04 00:00:00 weekday 718 | |
424 41660 Lake/State 2010-01-04 00:00:00 weekday 14813 | |
425 41500 Montrose-Brown 2010-01-04 00:00:00 weekday 1963 | |
426 41490 Harrison 2010-01-04 00:00:00 weekday 2287 | |
427 41480 Western-Brown 2010-01-04 00:00:00 weekday 3398 | |
428 41460 Irving Park-Brown 2010-01-04 00:00:00 weekday 2198 | |
429 41450 Chicago/State 2010-01-04 00:00:00 weekday 11041 | |
430 41440 Addison-Brown 2010-01-04 00:00:00 weekday 2090 | |
431 41430 87th 2010-01-04 00:00:00 weekday 4558 | |
432 41420 Addison-North Main 2010-01-04 00:00:00 weekday 4835 | |
 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ridership.date = pd.to_datetime(ridership.date) | |
lolla = ridership.loc[(ridership.date.dt.year == 2011) & (ridership.date.dt.month == 8)] | |
lolla.head(5) | |
Out[65]: | |
station_id stationname date daytype rides | |
81806 40730 Washington/Wells 2011-08-01 weekday 7420 | |
81807 40720 East 63rd-Cottage Grove 2011-08-01 weekday 1443 | |
81808 40710 Chicago/Franklin 2011-08-01 weekday 6591 | |
81809 40700 Laramie 2011-08-01 weekday 1390 | |
81810 40700 Laramie 2011-08-01 weekday 1381 | |
lolla_grouped = lolla.groupby("date") | |
lolla_ridership = pd.Series(lolla_grouped.rides.sum()) | |
lolla_ridership.plot() | |
Out[70]: <matplotlib.axes._subplots.AxesSubplot at 0x10d0cb290> | |
#shows awesome graph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
station_ridership = pd.Series(stations.rides.sum().sort(inplace = False, ascending = False).head(5)) | |
station_ridership | |
Out[72]: | |
stationname | |
Lake/State 29458946 | |
Clark/Lake 27744163 | |
Chicago/State 26062944 | |
Belmont-North Main 21712070 | |
Fullerton 21485675 | |
Name: rides, dtype: int64 | |
station_ridership.plot(kind="bar") # shows awesome bar graph | |
s = station_ridership.loc[station_ridership != 0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment