Created
February 1, 2014 02:47
-
-
Save 8bit-pixies/8747253 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# <nbformat>3.0</nbformat> | |
# <codecell> | |
from stan.transcompile import transcompile | |
import stan_magic | |
from pandas import DataFrame | |
import numpy as np | |
import pkgutil | |
# <codecell> | |
import stan.proc_functions as proc_func | |
mod_name = ["from stan.proc_functions import %s" % name for _, name, _ in pkgutil.iter_modules(proc_func.__path__)] | |
exec("\n".join(mod_name)) | |
# <codecell> | |
# create an example data frame | |
df = DataFrame(np.random.randn(10, 5), columns = ['a','b','c','d','e']) | |
df | |
# <codecell> | |
%%stan | |
data test; | |
set df (drop = a); | |
run; | |
# <codecell> | |
exec(_) | |
test | |
# <markdowncell> | |
# `if` statements combined with `do` `end` statements were difficult to implement. Here is my current | |
# implementation of if-then-else control flow, (I'll have to revisit `if` and `do` `end` statements in the future...) | |
# <codecell> | |
%%stan | |
data df_if; | |
set df; | |
x = if b < 0.3 then 0 else if b < 0.6 then 1 else 2; | |
run; | |
# <codecell> | |
exec(_) | |
df_if | |
# <markdowncell> | |
# --- | |
# <codecell> | |
# procs can be added manually they can be thought of as python functions | |
# you can define your own, though I need to work on the parser | |
# to get it "smooth" | |
df1 = DataFrame({'a' : [1, 0, 1], 'b' : [0, 1, 1] }, dtype=bool) | |
df1 | |
# <codecell> | |
%%stan | |
proc describe data = df1 out = df2; | |
by a; | |
run; | |
# <codecell> | |
exec(_) | |
df2 | |
# <markdowncell> | |
# The proc actually isn't difficult to write. So for the above code it is actually just this: | |
# | |
# | |
# def describe(data, by): | |
# return data.groupby(by).describe() | |
# | |
# This functionality allow you to handle most of the `by` and `retain` cases. For languages | |
# like Python and R, the normal way to handle data is through the split-apply-combine methodology. | |
# | |
# Merges can be achieved in a similar way, by creating a `proc`: | |
# <codecell> | |
%%stan | |
proc merge out = df2; | |
dt_left left; | |
dt_right right; | |
on = 'key'; | |
run; | |
# <codecell> | |
left = DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]}) | |
right = DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]}) | |
exec(_) | |
df2 | |
# <markdowncell> | |
# heres an example showing how you can define your own function and run it (not a function | |
# that came with the package) | |
# <codecell> | |
def sum_mean_by(data, by): | |
return data.groupby(by).agg([np.sum, np.mean]) | |
# <codecell> | |
%%stan | |
proc sum_mean_by data = df_if out = df_sum; | |
by x; | |
run; | |
# <codecell> | |
exec(_) | |
df_sum | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment