Skip to content

Instantly share code, notes, and snippets.

@leoluyi
Last active August 29, 2015 14:10
Show Gist options
  • Save leoluyi/e7aafd17b02f692d0274 to your computer and use it in GitHub Desktop.
Save leoluyi/e7aafd17b02f692d0274 to your computer and use it in GitHub Desktop.
## === 第一次用R需安裝套件 ===
list.of.packages <- c("plyr", "dplyr", "sjmisc")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
rm(list.of.packages, new.packages)
combine_pipe_table <- function (
.data, .var_start, .var_end, .MR_num, .step)
{
# .var_start # 表格起始變數名
# .var_end # 表格末端變數名
# .MR_num # 複選題選項數
# .step # piping相同題目數
# 防呆
if(!(.var_start %in% colnames(.data))) stop(c("找不到變數: ",.var_start))
if(!(.var_end %in% colnames(.data))) stop(c("找不到變數: ",.var_end))
startCol <- match(.var_start, colnames(.data)) # 開始欄
endCol <- match(.var_end, colnames(.data)) # 結束欄
# 防呆
if(startCol >= endCol) stop("題目排列順序有誤")
varCountPerMR <- .MR_num * .step
totalVarCount <- endCol - startCol + 1
# 防呆
if(totalVarCount %% varCountPerMR != 0) {
stop("題目排列或輸入參數有誤")
} else Qcount <- totalVarCount / varCountPerMR # 原始表格未重複題目數
for(i in seq(1, totalVarCount - varCountPerMR + 1, by = varCountPerMR)) {
NowCol <- startCol + i -1
for(k in 0:(.MR_num -1)){
cat(i %/% varCountPerMR + 1, ":",
colnames(.data)[NowCol+k],'<<',colnames(.data)[NowCol+k+.MR_num],"\n")
.data[[NowCol+k]] <-
# 如果不是NA就填回前面變數
ifelse(!is.na(.data[[NowCol+k+.MR_num]]),
.data[[NowCol+k+.MR_num]], .data[[NowCol+k]])
}
}
.data
}
fix_pipe_table <- function (
.data, .var_start, .var_end, .MR_num)
{
# .var_start # 表格起始變數名
# .var_end # 表格末端變數名
# .MR_num # 複選題選項數
# .step # piping相同題目數
.step = 1
# 防呆
if(!(.var_start %in% colnames(.data))) stop(c("找不到變數: ",.var_start))
if(!(.var_end %in% colnames(.data))) stop(c("找不到變數: ",.var_end))
startCol <- match(.var_start, colnames(.data)) # 開始欄
endCol <- match(.var_end, colnames(.data)) # 結束欄
# 防呆
if(startCol >= endCol) stop("題目排列順序有誤")
varCountPerMR <- .MR_num * .step
totalVarCount <- endCol - startCol + 1
# 防呆
if(totalVarCount %% varCountPerMR != 0){
stop("題目排列或輸入參數有誤")
} else Qcount <- totalVarCount / varCountPerMR # 原始表格未重複題目數
## Progress bar
# pb <- txtProgressBar(min = 0, max = totalVarCount - varCountPerMR + 1,
# style = 3)
for(i in seq(1, totalVarCount - varCountPerMR + 1, by = .MR_num)) {
NowCol <- startCol + i - 1
names(.data)[NowCol:(NowCol+.MR_num-1)] %>% cat("(複選)", ., "\n")
rows_which_all_zero <- which(rowSums(.data[NowCol:(NowCol+.MR_num-1)]) == 0)
.data[rows_which_all_zero, NowCol:(NowCol+.MR_num-1)] <- NA
# setTxtProgressBar(pb,i) # update Progress bar
}
.data
}
# 有C、R或S的變數轉成數字
as.numeric_RC <- function (.data)
{
library(dplyr)
library(sjmisc)
is_tbl <- inherits(.data, "tbl")
if(!is.data.frame(.data)) {
.data <- dplyr:::as_data_frame(.data)
} else if (is_tbl) {
.data <- dplyr:::tbl_df(.data)
}
seq1 <- grep("C|R|S", names(.data), ignore.case = FALSE)
seq2 <- grep("Q", names(.data), ignore.case = FALSE)
which_criteria <- intersect(seq1, seq2)
.data[,which_criteria] <-
sapply(.data[,which_criteria], to_value)
if(!is.data.frame(.data)) {
.data <- dplyr:::as_data_frame(.data)
} else if (is_tbl) {
.data <- dplyr::tbl_df(.data)
}
.data
}
# 有T, O的變數轉成character
as.character_OT <- function (.data)
{
library(dplyr)
library(sjmisc)
is_tbl <- inherits(.data, "tbl")
if(!is.data.frame(.data)) {
.data <- dplyr:::as_data_frame(.data)
} else if (is_tbl) {
.data <- dplyr:::tbl_df(.data)
}
which_criteria <- grep("^Q[[:alnum:]]+O|^Q[[:alnum:]]+T|ID",
names(.data),
ignore.case = FALSE)
# get temp labels
temp_var_label <- get_var_labels(.data[,which_criteria])
.data[,which_criteria] <-
sapply(.data[,which_criteria], as.character)
# trim whitespace from start and end of string
.data[,which_criteria] <-
sapply(.data[,which_criteria], stringr::str_trim)
# restore variable lables
.data[,which_criteria] <- set_var_labels(.data[,which_criteria],
temp_var_label)
if(!is.data.frame(.data)) {
.data <- dplyr:::as_data_frame(.data)
} else if (is_tbl) {
.data <- dplyr:::tbl_df(.data)
}
.data
}
# 有R或S的變數轉成factor
to_label_RS <- function (.data)
{
library(dplyr)
library(sjmisc)
is_tbl <- inherits(.data, "tbl")
if(!is.data.frame(.data)) {
.data <- dplyr:::as_data_frame(.data)
} else if (is_tbl) {
.data <- dplyr:::tbl_df(.data)
}
which_criteria <- grep("^Q[[:alnum:]]+R|^Q[[:alnum:]]+S",
names(.data),
ignore.case = FALSE)
# get temp labels
temp_var_label <- get_var_labels(.data[,which_criteria])
# .data[,which_criteria] <-
# sapply(.data[,which_criteria], to_label)
for(i in which_criteria) {
temp_val_label <- get_val_labels(.data[[i]])
.data[[i]] <- to_label(.data[[i]]) %>%
set_val_labels(temp_val_label)
}
# restore variable lables
.data[,which_criteria] <- set_var_labels(.data[,which_criteria],
temp_var_label)
if(!is.data.frame(.data)) {
.data <- dplyr:::as_data_frame(.data)
} else if (is_tbl) {
.data <- dplyr:::tbl_df(.data)
}
.data
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment