Created
October 3, 2014 16:16
-
-
Save mbannert/e62079c55a493e7c6668 to your computer and use it in GitHub Desktop.
Improvement of the R code used in Ioannis' example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load data from an SPSS file | |
library(foreign) | |
votematch<-read.spss("ioannis.sav", to.data.frame=TRUE, reencode="UTF-8") | |
# explore the loaded dataset | |
head(votematch) | |
dim(votematch) | |
names(votematch) | |
str(votematch) | |
# look at a vector | |
votematch$t1[1] | |
# disable exponential notation | |
options(scipen = 4) | |
# avoid loops, it's faster, safer and more R-ish | |
# copy the data.frame votematch in order not affect the original data.frame | |
votematch2 <- votematch | |
# use string processing to get names of the relevant cols | |
relevant_colums <- grep("^t",names(votematch2),value=T) | |
# subselect the relevant columns and do a matrix inside the index brackets [] | |
relevant_df <- votematch2[,relevant_colums] | |
relevant_df[relevant_df <= 0] <- NA | |
# after the 'bad' values where set to an NA we can use a standard R function to handle it. | |
# replace the corresponding part in the original data.frame | |
votematch2[,relevant_columns] <- relevant_df | |
# omit the cols that contains NAs in ANY of t cols | |
votematch2 <- na.omit(votematch2) | |
# using the microbenchark library you could benchmark | |
# the entire process | |
library(microbenchmark) | |
microbenchmark({relevant_colums <- grep("^t",names(votematch2),value=T) | |
relevant_df <- votematch2[,relevant_colums] | |
relevant_df[relevant_df <= 0] <- NA | |
relevant_df_clean <- na.omit(relevant_df) | |
}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment