Skip to content

Instantly share code, notes, and snippets.

@leoluyi
Last active August 29, 2015 14:08
Show Gist options
  • Save leoluyi/729439b6e53a51b3a54e to your computer and use it in GitHub Desktop.
Save leoluyi/729439b6e53a51b3a54e to your computer and use it in GitHub Desktop.
食安變數合併(recode)
library(data.table)
library(xlsx)
library(dplyr)
library(pipeR)
library(car)
## 讀取資料
fileNameA <- '3337_A卷.csv' # (手動更改參數)
fileNameB <- '3371_B卷.csv' # (手動更改參數)
# options(java.parameters = "-Xmx16g") # read.xlsx增加記憶體
raw.dataA <- fread(fileNameA, header=T, colClasses="character") %>%
as.data.frame() %>%
select(-c(Sequence, Identity, 年齡, 性別))
raw.dataB <- fread(fileNameB, header=T, colClasses="character") %>%
as.data.frame() %>%
select(-c(Sequence, Identity, 年齡, 性別))
raw.data <- inner_join(raw.dataA, raw.dataB, by = 'Panel.ID') # inner join
## 有T的變數(開放題)搬到最後面
raw.data %>>%
(~ woT <- select(., -matches(".T.", ignore.case = FALSE))) %>>% # 不含T的變數
select(matches(".T.", ignore.case = FALSE)) %>>% # 包含T的變數
(~ raw.data <- cbind(woT,.)) # 合併
## 有C或R的變數轉成數字
for(i in grep("C|R", names(raw.data))){
raw.data[,i] <-
raw.data[,i] %>% as.numeric()
}
try(
raw.data$共同題目總應答時間 <-
raw.data$共同題目總應答時間 %>% as.numeric()
)
# --------
pipeRecode <- function (
.data, .var,
start = grep(.var, colnames(.data)),
Qcount, MRnum, step, key)
{
# .var # 表格起始變數名
# start # 開始欄
# Qcount # piping來源題數
# MRnum # 複選題數
# step # piping相同題數
# key # 關鍵字
library(dplyr)
# 防呆
is.integer0 <- function(x) {is.integer(x) && length(x) == 0L}
if(is.integer0(start)) stop(c("找不到變數: ",.var))
for(i in seq(1,(MRnum*step)*(Qcount-1)+1, by=step*MRnum)) {
j <- start + i -1
for(k in 0:(MRnum -1)){
cat(i %/% (step*MRnum) + 1,
colnames(.data)[j+k],'<<',colnames(.data)[j+k+MRnum],"\n")
.data[,j+k] <-
ifelse(.data[,j+k+MRnum]==key, .data[,j+k+MRnum], .data[,j+k])
}
}
return(.data)
}
test.data <- raw.data %>%
pipeRecode(.var='Q75C1', Qcount=47, MRnum=6, step=2, key=1) %>%
pipeRecode(.var='Q197C1', Qcount=23, MRnum=6, step=2, key=1) %>%
pipeRecode(.var='Q292C1', Qcount=44, MRnum=6, step=2, key=1) %>%
pipeRecode(.var='Q399C1', Qcount=14, MRnum=6, step=2, key=1) %>%
pipeRecode(.var='Q495C1', Qcount=63, MRnum=6, step=2, key=1) %>%
# B卷
pipeRecode(.var='QB32C1', Qcount=28, MRnum=6, step=2, key=1) %>%
pipeRecode(.var='QB160C1', Qcount=67, MRnum=6, step=2, key=1) %>%
pipeRecode(.var='QB362C1', Qcount=63, MRnum=6, step=2, key=1) %>%
pipeRecode(.var='QB556C1', Qcount=63, MRnum=6, step=2, key=1) %>%
pipeRecode(.var='QB698C1', Qcount=11, MRnum=6, step=2, key=1) %>%
pipeRecode(.var='QB753C1', Qcount=28, MRnum=6, step=2, key=1)
## 測試結果
test.data %>%
select(Q79C3,Q80C3) %>%
subset(Q80C3==1)
## recode
test.data$年齡 <- test.data$Q2R %>%
recode(., "
2:3 = '13-19age';
4 = '20-24age';
5 = '25-29age';
6 = '30-34age';
7 = '35-39age';
8 = '40-44age';
9 = '45-49age';
10:13 = '50-64age'
")
test.data$地區 <- test.data$Q3R %>%
recode("
1:5 = 'N';
6:10 = 'C';
11:14 = 'S';
15:18 = 'E'
")
test.data$地區2 <- test.data$Q3R %>%
recode("
c(11,14) = 'CYI_PYHG';
15:18 = 'EandO';
13 = 'KHH';
c(1,4,5) = 'KLU_TYU_HS';
c(6,8,9,10) = 'MAL_CWH_YUN_NTO';
3 = 'NTPE';
12 = 'TNN';
2 = 'TPE';
7 = 'TXG'
")
test.data$性別 <- test.data$Q1R %>%
recode("
1 = 'male';
2 = 'female'
")
## === 抽樣 ==========================
#-------讀取舊有配額設定 -----#
strataCrossN_flat <- read.csv(
"R實抽樣本.csv", header = TRUE)
xtabs(Freq~年齡, strataCrossN_flat) # 年齡
xtabs(Freq~地區, strataCrossN_flat) # 地區
xtabs(Freq~性別, strataCrossN_flat) # 性別
write.xlsx2(test.data,'食安.xlsx',na = "",row.names =F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment