Skip to content

Instantly share code, notes, and snippets.

@statcompute
Last active November 24, 2018 06:08
Show Gist options
  • Save statcompute/89791e5cfb6fb256f5b72abf36868952 to your computer and use it in GitHub Desktop.
Save statcompute/89791e5cfb6fb256f5b72abf36868952 to your computer and use it in GitHub Desktop.
An example for xframes
from xframes import XFrame, aggregate
df = XFrame.read_csv("Downloads/nycflights.csv", header = True, nrows = 11)
### SUBSETTING
sel_cols = ["origin", "dest", "distance", "dep_delay", "carrier"]
df2 = df[sel_cols]
# OR:
# df.sql("select " + ", ".join(sel_cols) + " from df")
### FILTERING ###
print df2[(df2["origin"] == 'EWR') & (df2["carrier"] == "UA")]
# OR:
# print df2.filterby("EWR", "origin").filterby("UA", "carrier")
### AGGREGATING ###
from numpy import median
grp1 = df2.groupby("origin", {"dist": aggregate.CONCAT("distance")})
agg1 = XFrame({"origin": grp1["origin"], "med_dist": map(median, grp1["dist"])})
# OR:
# grp1["med_dist"] = grp1.apply(lambda row: median(row["dist"]))
# agg1 = grp1[["origin", "med_dist"]]
# USING SQL:
# df2.sql("select origin, percentile_approx(distance, 0.5) as med_dist from df2 group by origin")
for row in agg1:
print row
# {'origin': u'LGA', 'med_dist': 747.5}
# {'origin': u'JFK', 'med_dist': 1089.0}
# {'origin': u'EWR', 'med_dist': 1065.0}
agg2 = df2.groupby("origin", {"avg_delay": aggregate.MEAN("dep_delay")})
# USING SQL:
# df2.sql("select origin, mean(dep_delay) as avg_delay from df2 group by origin")
for row in agg2:
print row
# {'origin': u'LGA', 'avg_delay': -1.75}
# {'origin': u'JFK', 'avg_delay': -0.6666666666666666}
# {'origin': u'EWR', 'avg_delay': -2.3333333333333335}
### JOINING ###
for row in agg1.join(agg2, on = {"origin": "origin"}, how = "inner"):
print row
# {'origin': u'LGA', 'med_dist': 747.5, 'avg_delay': -1.75}
# {'origin': u'JFK', 'med_dist': 1089.0, 'avg_delay': -0.6666666666666666}
# {'origin': u'EWR', 'med_dist': 1065.0, 'avg_delay': -2.3333333333333335}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment