chrishaid/Kickboard_long_to_2_row_wide.R

## Kickboard_long_to_2_row_wide.R
####################################################################################
## Script to transform
## Long format Kicboard culture csv
## to wide format (by behavoir event), where
## each student is represented by 2(!) rows
## One row for negative paycheck events (i.e., deductions)
## and the other row for postive (though value 0) culture events (Create Points)
#################################################################################

#Load Libraries
library(reshape)
library(plyr)

#Get Kickboard culture data
kb.long<-read.csv("/Users/chaid/Dropbox/Consulting/KIPP Ascend/Data Requests/Random/Kickboard_long_to_wide/culture-analysis_121202.csv")


#Discriminate between Create Points and Paycheck Decutions
kb.long$CreatePoint<-is.na(kb.long$Dollar.Value)


#This functino simply prodives consecutive numbering to a sorted dataframe
consecutive<-function(df){
  df$OrderID<-c(1:nrow(df))
  return(df)
}

#Data Range to rearrange
x.long<-subset(kb.long, ymd(Behavior.Date)>=ymd("2012/11/26")&ymd(Behavior.Date)<=ymd("2012/12/02"))

#Change numeric to factors so that values don't get NA's during melt opoeration (see next line)
x.long$Dollar.Value<-as.factor(x.long$Dollar.Value)

#make what is long even longer
x<-melt(x.long, id.vars=c("Student","Group","CreatePoint"), measure.vars=c("Behavior", "Dollar.Value", "Behavior.Date", "Staff", "Comments"))

#rename variables to comport with Kate's mail merge input spreadsheet column headers
levels(x$variable)<-c("Behavior", "Dollar", "Date", "Staff", "Comments")

#add numbering for each event
x<-ddply(x, .(Student, CreatePoint, variable), function(df) consecutive(df))

#rename the variables with number, name, number (the first number is later dropped, it is used for ordering (thouhg I think this can be deprecated))
x$variable2<-paste(x$OrderID, x$variable, x$OrderID, sep=" ")

#get max number of student events for the period
n<-max(x$OrderID)

#go from long to wide and rearrange by group and student names
x.wide<-arrange(cast(x, Student+Group+CreatePoint~variable2), Group, Student)

#strip leading number from column headers
names(x.wide)[-(1:3)]<-gsub("(\\d{1,2})(\\s)(Behavior|Date|Comments|Dollar|Staff)(\\s)(\\d{1,2})", "\\3\\4\\5",   names(x.wide)[-(1:3)])

#reorder columns to comport with Kate's mail merge input spreadsheet
col.order<-vector()
cats<-c("Behavior", "Dollar","Date", "Staff", "Comments")
for(i in 1:n){
  cols.new<-paste(cats, i, sep=" ")
  col.order<-c(col.order, cols.new)
}

#Get paycheck totals (50 +  the sume of negative decuctions)
paycheck<-ddply(subset(x, variable=="Dollar"), .(Student, Group), summarise, Paycheck=50+sum(as.numeric(levels(value))[value],na.rm=T))

#merge paycheck totals with events
x.merge<-merge(x=x.wide, y=paycheck, by.x=c("Student", "Group"), by.y=c("Student","Group"), all.x=T)

#rearrange and drop unnecessary columns
z<-x.merge[,c("Student", "Group","Paycheck", col.order)]

#rename Group to Homeroom
names(z)[2]<-"Homeroom"

#resort order by Group name and then Student name
z<-arrange(z,Homeroom, Student)

#write the file
write.csv(z,"/Users/chaid/Dropbox/Consulting/KIPP Ascend/Data Requests/Random/Kickboard_long_to_wide/KB_121202e.csv", row.names=FALSE,na="")
	####################################################################################
	## Script to transform
	## Long format Kicboard culture csv
	## to wide format (by behavoir event), where
	## each student is represented by 2(!) rows
	## One row for negative paycheck events (i.e., deductions)
	## and the other row for postive (though value 0) culture events (Create Points)
	#################################################################################

	#Load Libraries
	library(reshape)
	library(plyr)

	#Get Kickboard culture data
	kb.long<-read.csv("/Users/chaid/Dropbox/Consulting/KIPP Ascend/Data Requests/Random/Kickboard_long_to_wide/culture-analysis_121202.csv")


	#Discriminate between Create Points and Paycheck Decutions
	kb.long$CreatePoint<-is.na(kb.long$Dollar.Value)


	#This functino simply prodives consecutive numbering to a sorted dataframe
	consecutive<-function(df){
	df$OrderID<-c(1:nrow(df))
	return(df)
	}

	#Data Range to rearrange
	x.long<-subset(kb.long, ymd(Behavior.Date)>=ymd("2012/11/26")&ymd(Behavior.Date)<=ymd("2012/12/02"))

	#Change numeric to factors so that values don't get NA's during melt opoeration (see next line)
	x.long$Dollar.Value<-as.factor(x.long$Dollar.Value)

	#make what is long even longer
	x<-melt(x.long, id.vars=c("Student","Group","CreatePoint"), measure.vars=c("Behavior", "Dollar.Value", "Behavior.Date", "Staff", "Comments"))

	#rename variables to comport with Kate's mail merge input spreadsheet column headers
	levels(x$variable)<-c("Behavior", "Dollar", "Date", "Staff", "Comments")

	#add numbering for each event
	x<-ddply(x, .(Student, CreatePoint, variable), function(df) consecutive(df))

	#rename the variables with number, name, number (the first number is later dropped, it is used for ordering (thouhg I think this can be deprecated))
	x$variable2<-paste(x$OrderID, x$variable, x$OrderID, sep=" ")

	#get max number of student events for the period
	n<-max(x$OrderID)

	#go from long to wide and rearrange by group and student names
	x.wide<-arrange(cast(x, Student+Group+CreatePoint~variable2), Group, Student)

	#strip leading number from column headers
	names(x.wide)[-(1:3)]<-gsub("(\\d{1,2})(\\s)(Behavior\|Date\|Comments\|Dollar\|Staff)(\\s)(\\d{1,2})", "\\3\\4\\5", names(x.wide)[-(1:3)])

	#reorder columns to comport with Kate's mail merge input spreadsheet
	col.order<-vector()
	cats<-c("Behavior", "Dollar","Date", "Staff", "Comments")
	for(i in 1:n){
	cols.new<-paste(cats, i, sep=" ")
	col.order<-c(col.order, cols.new)
	}

	#Get paycheck totals (50 + the sume of negative decuctions)
	paycheck<-ddply(subset(x, variable=="Dollar"), .(Student, Group), summarise, Paycheck=50+sum(as.numeric(levels(value))[value],na.rm=T))

	#merge paycheck totals with events
	x.merge<-merge(x=x.wide, y=paycheck, by.x=c("Student", "Group"), by.y=c("Student","Group"), all.x=T)

	#rearrange and drop unnecessary columns
	z<-x.merge[,c("Student", "Group","Paycheck", col.order)]

	#rename Group to Homeroom
	names(z)[2]<-"Homeroom"

	#resort order by Group name and then Student name
	z<-arrange(z,Homeroom, Student)

	#write the file
	write.csv(z,"/Users/chaid/Dropbox/Consulting/KIPP Ascend/Data Requests/Random/Kickboard_long_to_wide/KB_121202e.csv", row.names=FALSE,na="")