Skip to content

Instantly share code, notes, and snippets.

@thatseeyou
Last active March 19, 2016 12:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thatseeyou/6ab83f8e8d38bac92436 to your computer and use it in GitHub Desktop.
Save thatseeyou/6ab83f8e8d38bac92436 to your computer and use it in GitHub Desktop.
#
# ex 4-1
#
a <- list(1, "abc", FALSE)
mode(a[[2]])
typeof(a[[2]])
#
# ex 4-2
#
# 4-2.1
x <- sample(10:20, 100, replace=TRUE)
find_frequency <- function(key_range, values) {
freq_list <- list()
for (i in key_range) {
freq_list[[as.character(i)]] = 0
}
for (value in values) {
freq_list[[as.character(value)]] = freq_list[[as.character(value)]] + 1
}
return(freq_list)
}
freq_list <- find_frequency(10:20, x)
# split을 사용하면 한 줄로 가능
#freq_list <- lapply(split(1:length(x), x), length)
# 4-2.2
find_max_frequency <- function(freq_list) {
highest = list(key='10', frequency = 0)
for (i in names(freq_list)) {
if (freq_list[[i]] > highest$frequency) {
highest$key = i
highest$frequency = freq_list[[i]]
}
}
return(highest)
}
highest_element = find_max_frequency(freq_list)
# 4-2.3
sort(freq_unlist, decreasing = TRUE)[1]
lines <- "
Jim Hoffer Male 52 1964.03.18 30
Sonya Martin Female 41 1975.09.21 12
Rachel Darwin Female 34 1982.05.15 5
Edward Cruze Male 30 1986.11.02 3
"
# read csv from string
con <- textConnection(lines)
people_info <- read.table(con, header = FALSE, stringsAsFactors = FALSE)
close(con)
# a.
names(people_info) <- c("Name", "Surname", "Gender", "Age", "DateOfBirth", "WorkingPeriod")
# b. Display Name, Gender, Age information of peole who have Age < 50
#people_info[people_info$Age < 50, names(people_info) %in% c("Name", "Gender", "Age")]
people_info[people_info$Age < 50, c("Name", "Gender", "Age")]
# c. Change Age of Sonya to 40 and DateOfBirth to 1976.09.21
# people_info[people_info$Name == "Sonya",]$Age = 40
# people_info[people_info$Name == "Sonya",]$DateOfBirth = "1976.09.21"
people_info[people_info$Name == "Sonya", c("Age", "DateOfBirth")] <- list(40, "1976.09.21")
# d. Decrease working period of all people to 1
people_info$WorkingPeriod = people_info$WorkingPeriod - 1
sapply(people_info, typeof)
# e. Add one more information of Choonmee (Name= Choonmee, Surname = Cha, Gender = Female, Age = 29, DateOfBirth = 1987.04.26, WorkingPeriod = 5)
people_info = rbind(people_info, list("Choonmee", "Cha", "Female", 29, "1987.04.26", 5))
# f. Calculate the average Age and average WorkingPeriod of all people
apply(people_info[, c("Age", "WorkingPeriod")], 2, mean)
# g. Remove the Surname information
#people_info[, -which(names(people_info) %in% c("Surname"))]
#people_info$Surname <- NULL
people_info[, c("Surname")] <- NULL
# h. Sort the data frame by increasing WorkingPeriod
#people_info[with(people_info, order(WorkingPeriod)), ]
people_info[order(people_info$WorkingPeriod), ]
# i. Create another data frame (people_hobby) as Chapter5_pic2
lines <- "
Name,Hobby
Jim,Voleyball
Rachel,Billardshe
Edward,Skiing
Dasan,Music
"
con <- textConnection(lines)
people_hobby <- read.csv(con, stringsAsFactors = FALSE)
close(con)
# j. Merge the (people_info) with (peole_hobby). Save the merged data to a excel file (Chapter5_result.csv) people_info
merge(people_info, people_hobby)
merge(people_hobby, people_info)
lines <- "
이름,대중교통_접근시간[분],대중교통_접근수단,대중교통_통행목적,대중교통_유형,대중교통_이용시간[분]
박해영,15,도보,등하교,버스,20
차수현,5,도보,출퇴근,버스,15
이재한,20,자전거,출퇴근,지하철,20
김범주,17,택시,기타,버스,40
안치수,13,승용차,출퇴근,지하철+버스,60
김계철,30,자전거,학원,지하철,15
오윤서,12,도보,학원,버스,25
정헌기,6,자전거,등하교,지하철,20
황의경,9,도보,등하교,지하철+버스,30
장영철,13,승용차,출퇴근,지하철,45
"
# 1. 첨부한 csv 파일을 사용하여 테이블을 만들어 봅시다.
# read csv from string
con <- textConnection(lines)
transport_usage <- read.csv(con, header = TRUE, stringsAsFactors = TRUE)
close(con)
# 2. "대중교통\_접근수단"에 따른 "대중교통\_접근시간"의 평균을 구해봅시다.
tapply(transport_usage$대중교통_접근시간.분., transport_usage$대중교통_접근수단, mean)
# 3. "대중교통\_유형"에 따라 그룹을 나누고, 각 그룹에서 가장 큰 "대중교통\_이용시간"을 구해봅시다.
tapply(transport_usage$대중교통_이용시간.분., transport_usage$대중교통_유형, max)
# 4. 대중교통을 사용하여 "출퇴근"하는 사람들의 "출퇴근" 시간의 합을 구해봅시다.
sum(split(transport_usage$대중교통_이용시간.분., transport_usage$대중교통_통행목적)$출퇴근)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment