mbannert/gist:e62079c55a493e7c6668

## gistfile1.r
# load data from an SPSS file
library(foreign)
votematch<-read.spss("ioannis.sav", to.data.frame=TRUE, reencode="UTF-8")

# explore the loaded dataset
head(votematch)
dim(votematch)
names(votematch)
str(votematch)

# look at a vector
votematch$t1[1]
# disable exponential notation
options(scipen = 4)


# avoid loops, it's faster, safer and more R-ish
# copy the data.frame votematch in order not affect the original data.frame
votematch2 <- votematch

# use string processing to get names of the relevant cols
relevant_colums <- grep("^t",names(votematch2),value=T)

# subselect the relevant columns and do a matrix inside the index brackets []
relevant_df <- votematch2[,relevant_colums]
relevant_df[relevant_df <= 0] <- NA
# after the 'bad' values where set to an NA we can use a standard R function to handle it.
# replace the corresponding part in the original data.frame
votematch2[,relevant_columns] <- relevant_df
# omit the cols that contains NAs in ANY of t cols
votematch2 <- na.omit(votematch2)


# using the microbenchark library you could benchmark
# the entire process
library(microbenchmark)
microbenchmark({relevant_colums <- grep("^t",names(votematch2),value=T)
                relevant_df <- votematch2[,relevant_colums]
                relevant_df[relevant_df <= 0] <- NA
                relevant_df_clean <- na.omit(relevant_df)
})
	# load data from an SPSS file
	library(foreign)
	votematch<-read.spss("ioannis.sav", to.data.frame=TRUE, reencode="UTF-8")

	# explore the loaded dataset
	head(votematch)
	dim(votematch)
	names(votematch)
	str(votematch)

	# look at a vector
	votematch$t1[1]
	# disable exponential notation
	options(scipen = 4)


	# avoid loops, it's faster, safer and more R-ish
	# copy the data.frame votematch in order not affect the original data.frame
	votematch2 <- votematch

	# use string processing to get names of the relevant cols
	relevant_colums <- grep("^t",names(votematch2),value=T)

	# subselect the relevant columns and do a matrix inside the index brackets []
	relevant_df <- votematch2[,relevant_colums]
	relevant_df[relevant_df <= 0] <- NA
	# after the 'bad' values where set to an NA we can use a standard R function to handle it.
	# replace the corresponding part in the original data.frame
	votematch2[,relevant_columns] <- relevant_df
	# omit the cols that contains NAs in ANY of t cols
	votematch2 <- na.omit(votematch2)




	# using the microbenchark library you could benchmark
	# the entire process
	library(microbenchmark)
	microbenchmark({relevant_colums <- grep("^t",names(votematch2),value=T)
	relevant_df <- votematch2[,relevant_colums]
	relevant_df[relevant_df <= 0] <- NA
	relevant_df_clean <- na.omit(relevant_df)
	})