8bit-pixies/stan_example.ipynb

## stan_example.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              stan_example.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## stan_example.py
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>

# <codecell>

from stan.transcompile import transcompile
import stan_magic
from pandas import DataFrame
import numpy as np
import pkgutil

# <codecell>

import stan.proc_functions as proc_func

mod_name = ["from stan.proc_functions import %s" % name for _, name, _ in pkgutil.iter_modules(proc_func.__path__)]
exec("\n".join(mod_name))

# <codecell>

# create an example data frame
df = DataFrame(np.random.randn(10, 5), columns = ['a','b','c','d','e'])
df

# <codecell>

%%stan
data test;
set df (drop = a);
run;

# <codecell>

exec(_)
test

# <markdowncell>

# `if` statements combined with `do` `end` statements were difficult to implement. Here is my current
# implementation of if-then-else control flow, (I'll have to revisit `if` and `do` `end` statements in the future...)

# <codecell>

%%stan
data df_if;
    set df;
    x = if b < 0.3 then 0 else if b < 0.6 then 1 else 2;
run;

# <codecell>

exec(_)
df_if

# <markdowncell>

# ---

# <codecell>

# procs can be added manually they can be thought of as python functions
# you can define your own, though I need to work on the parser
# to get it "smooth"

df1 = DataFrame({'a' : [1, 0, 1], 'b' : [0, 1, 1] }, dtype=bool)
df1

# <codecell>

%%stan
proc describe data = df1 out = df2;
by a;
run;

# <codecell>

exec(_)
df2

# <markdowncell>

# The proc actually isn't difficult to write. So for the above code it is actually just this:
#
#
#     def describe(data, by):
#         return data.groupby(by).describe()
#
# This functionality allow you to handle most of the `by` and `retain` cases. For languages
# like Python and R, the normal way to handle data is through the split-apply-combine methodology.
#
# Merges can be achieved in a similar way, by creating a `proc`:

# <codecell>

%%stan
proc merge out = df2;
dt_left left;
dt_right right;
on = 'key';
run;

# <codecell>

left = DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})

exec(_)
df2

# <markdowncell>

# heres an example showing how you can define your own function and run it (not a function
# that came with the package)

# <codecell>

def sum_mean_by(data, by):
    return data.groupby(by).agg([np.sum, np.mean])

# <codecell>

%%stan
proc sum_mean_by data = df_if out = df_sum;
by x;
run;

# <codecell>

exec(_)
df_sum
	# -- coding: utf-8 --
	# <nbformat>3.0</nbformat>

	# <codecell>

	from stan.transcompile import transcompile
	import stan_magic
	from pandas import DataFrame
	import numpy as np
	import pkgutil

	# <codecell>

	import stan.proc_functions as proc_func

	mod_name = ["from stan.proc_functions import %s" % name for _, name, _ in pkgutil.iter_modules(proc_func.__path__)]
	exec("\n".join(mod_name))

	# <codecell>

	# create an example data frame
	df = DataFrame(np.random.randn(10, 5), columns = ['a','b','c','d','e'])
	df

	# <codecell>

	%%stan
	data test;
	set df (drop = a);
	run;

	# <codecell>

	exec(_)
	test

	# <markdowncell>

	# `if` statements combined with `do` `end` statements were difficult to implement. Here is my current
	# implementation of if-then-else control flow, (I'll have to revisit `if` and `do` `end` statements in the future...)

	# <codecell>

	%%stan
	data df_if;
	set df;
	x = if b < 0.3 then 0 else if b < 0.6 then 1 else 2;
	run;

	# <codecell>

	exec(_)
	df_if

	# <markdowncell>

	# ---

	# <codecell>

	# procs can be added manually they can be thought of as python functions
	# you can define your own, though I need to work on the parser
	# to get it "smooth"

	df1 = DataFrame({'a' : [1, 0, 1], 'b' : [0, 1, 1] }, dtype=bool)
	df1

	# <codecell>

	%%stan
	proc describe data = df1 out = df2;
	by a;
	run;

	# <codecell>

	exec(_)
	df2

	# <markdowncell>

	# The proc actually isn't difficult to write. So for the above code it is actually just this:
	#
	#
	# def describe(data, by):
	# return data.groupby(by).describe()
	#
	# This functionality allow you to handle most of the `by` and `retain` cases. For languages
	# like Python and R, the normal way to handle data is through the split-apply-combine methodology.
	#
	# Merges can be achieved in a similar way, by creating a `proc`:

	# <codecell>

	%%stan
	proc merge out = df2;
	dt_left left;
	dt_right right;
	on = 'key';
	run;

	# <codecell>

	left = DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
	right = DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})

	exec(_)
	df2

	# <markdowncell>

	# heres an example showing how you can define your own function and run it (not a function
	# that came with the package)

	# <codecell>

	def sum_mean_by(data, by):
	return data.groupby(by).agg([np.sum, np.mean])

	# <codecell>

	%%stan
	proc sum_mean_by data = df_if out = df_sum;
	by x;
	run;

	# <codecell>

	exec(_)
	df_sum