Created
December 7, 2012 18:39
-
-
Save chrishaid/4235380 to your computer and use it in GitHub Desktop.
Kickboard long to wide (2 row per student) format for R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#################################################################################### | |
## Script to transform | |
## Long format Kicboard culture csv | |
## to wide format (by behavoir event), where | |
## each student is represented by 2(!) rows | |
## One row for negative paycheck events (i.e., deductions) | |
## and the other row for postive (though value 0) culture events (Create Points) | |
################################################################################# | |
#Load Libraries | |
library(reshape) | |
library(plyr) | |
#Get Kickboard culture data | |
kb.long<-read.csv("/Users/chaid/Dropbox/Consulting/KIPP Ascend/Data Requests/Random/Kickboard_long_to_wide/culture-analysis_121202.csv") | |
#Discriminate between Create Points and Paycheck Decutions | |
kb.long$CreatePoint<-is.na(kb.long$Dollar.Value) | |
#This functino simply prodives consecutive numbering to a sorted dataframe | |
consecutive<-function(df){ | |
df$OrderID<-c(1:nrow(df)) | |
return(df) | |
} | |
#Data Range to rearrange | |
x.long<-subset(kb.long, ymd(Behavior.Date)>=ymd("2012/11/26")&ymd(Behavior.Date)<=ymd("2012/12/02")) | |
#Change numeric to factors so that values don't get NA's during melt opoeration (see next line) | |
x.long$Dollar.Value<-as.factor(x.long$Dollar.Value) | |
#make what is long even longer | |
x<-melt(x.long, id.vars=c("Student","Group","CreatePoint"), measure.vars=c("Behavior", "Dollar.Value", "Behavior.Date", "Staff", "Comments")) | |
#rename variables to comport with Kate's mail merge input spreadsheet column headers | |
levels(x$variable)<-c("Behavior", "Dollar", "Date", "Staff", "Comments") | |
#add numbering for each event | |
x<-ddply(x, .(Student, CreatePoint, variable), function(df) consecutive(df)) | |
#rename the variables with number, name, number (the first number is later dropped, it is used for ordering (thouhg I think this can be deprecated)) | |
x$variable2<-paste(x$OrderID, x$variable, x$OrderID, sep=" ") | |
#get max number of student events for the period | |
n<-max(x$OrderID) | |
#go from long to wide and rearrange by group and student names | |
x.wide<-arrange(cast(x, Student+Group+CreatePoint~variable2), Group, Student) | |
#strip leading number from column headers | |
names(x.wide)[-(1:3)]<-gsub("(\\d{1,2})(\\s)(Behavior|Date|Comments|Dollar|Staff)(\\s)(\\d{1,2})", "\\3\\4\\5", names(x.wide)[-(1:3)]) | |
#reorder columns to comport with Kate's mail merge input spreadsheet | |
col.order<-vector() | |
cats<-c("Behavior", "Dollar","Date", "Staff", "Comments") | |
for(i in 1:n){ | |
cols.new<-paste(cats, i, sep=" ") | |
col.order<-c(col.order, cols.new) | |
} | |
#Get paycheck totals (50 + the sume of negative decuctions) | |
paycheck<-ddply(subset(x, variable=="Dollar"), .(Student, Group), summarise, Paycheck=50+sum(as.numeric(levels(value))[value],na.rm=T)) | |
#merge paycheck totals with events | |
x.merge<-merge(x=x.wide, y=paycheck, by.x=c("Student", "Group"), by.y=c("Student","Group"), all.x=T) | |
#rearrange and drop unnecessary columns | |
z<-x.merge[,c("Student", "Group","Paycheck", col.order)] | |
#rename Group to Homeroom | |
names(z)[2]<-"Homeroom" | |
#resort order by Group name and then Student name | |
z<-arrange(z,Homeroom, Student) | |
#write the file | |
write.csv(z,"/Users/chaid/Dropbox/Consulting/KIPP Ascend/Data Requests/Random/Kickboard_long_to_wide/KB_121202e.csv", row.names=FALSE,na="") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment