Created
July 13, 2012 00:15
-
-
Save nborwankar/3101995 to your computer and use it in GitHub Desktop.
RSessionToMADlibPG
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=== | |
> rs <- dbSendQuery(con, statement = paste( | |
+ "SELECT * FROM", +"MADlib.c45_train('infogain','public.golf_data','trained_tree_infogain',null,'temperature,humidity','outlook,temperature,humidity,windy','id','class',100,'explicit', 5,0.001,0.001,0)")); | |
> df <- fetch(rs, n = -1) | |
> df | |
training_set_size tree_nodes tree_depth training_time split_criterion | |
14 8 3 00:00:00.311362 infogain | |
=== | |
> rs <- dbSendQuery(con, statement = "select MADlib.c45_display('trained_tree_infogain');") | |
> df <- fetch(rs, n = -1) | |
> df c45_display | |
1 \nTree 1\n | |
Root Node : class(Play) num_elements(14) predict_prob(0.642857142857143)\n | |
outlook: = overcast : class(Play) num_elements(4) predict_prob(1)\n | |
outlook: = rain : class(Play) num_elements(5) predict_prob(0.6)\n | |
windy: = false : class(Play) num_elements(3) predict_prob(1)\n | |
windy: = true : class(Do not Play) num_elements(2) predict_prob(1)\n | |
outlook: = sunny : class(Do not Play) num_elements(5) predict_prob(0.6)\n | |
humidity: <= 70 : class(Play) num_elements(2) predict_prob(1)\n | |
humidity: > 70 : class(Do not Play) num_elements(3) predict_prob(1)\n | |
=== | |
> rs <- dbSendQuery(con, statement = "select * from MADlib.c45_classify('trained_tree_infogain','golf_data','classification_result')"); | |
> df <- fetch(rs, n = -1); | |
> df | |
input_set_size classification_time | |
14 00:00:00.07043 | |
> | |
=== | |
> rs = dbSendQuery(con, statement=paste( | |
+ "select t.id,t.outlook,t.temperature,t.humidity,t.windy,c.class from", | |
+ "classification_result c,public.golf_data t where t.id=c.id order by id")); | |
> df <- fetch(rs, n = -1); | |
> df | |
id outlook temperature humidity windy class | |
1 1 sunny 85 85 FALSE Do not Play | |
2 2 sunny 80 90 TRUE Do not Play | |
3 3 overcast 83 78 FALSE Play | |
4 4 rain 70 96 FALSE Play | |
5 5 rain 68 80 FALSE Play | |
6 6 rain 65 70 TRUE Do not Play | |
7 7 overcast 64 65 TRUE Play | |
8 8 sunny 72 95 FALSE Do not Play | |
9 9 sunny 69 70 FALSE Play | |
10 10 rain 75 80 FALSE Play | |
11 11 sunny 75 70 TRUE Play | |
12 12 overcast 72 90 TRUE Play | |
13 13 overcast 81 75 FALSE Play | |
14 14 rain 71 80 TRUE Do not Play | |
=== | |
as we don't have a separate validation set we use the input set as a validation set - yes lame, I know. but that is not the point here. | |
> rs = dbSendQuery(con, statement="select * from MADlib.c45_score('trained_tree_infogain','public.golf_data',0)"); | |
> df <- fetch(rs, n = -1); | |
> df | |
c45_score | |
1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment