thatseeyou/ex4.R

## ex4.R
#
# ex 4-1
#
a <- list(1, "abc", FALSE)
mode(a[[2]])
typeof(a[[2]])


#
# ex 4-2
#
# 4-2.1
x <- sample(10:20, 100, replace=TRUE)
find_frequency <- function(key_range, values) {
  freq_list <- list()
  for (i in key_range) {
    freq_list[[as.character(i)]] = 0
  }
  for (value in values) {
    freq_list[[as.character(value)]] = freq_list[[as.character(value)]] + 1
  }
  return(freq_list)
}
freq_list <- find_frequency(10:20, x)

# split을 사용하면 한 줄로 가능
#freq_list <- lapply(split(1:length(x), x), length)

# 4-2.2
find_max_frequency <- function(freq_list) {
  highest = list(key='10', frequency = 0)
  for (i in names(freq_list)) {
    if (freq_list[[i]] > highest$frequency) {
      highest$key = i
      highest$frequency = freq_list[[i]]
    }
  }
  return(highest)
}
highest_element = find_max_frequency(freq_list)

# 4-2.3
sort(freq_unlist, decreasing = TRUE)[1]

## ex5.R
lines <- "
Jim Hoffer Male 52 1964.03.18 30
Sonya Martin Female 41 1975.09.21 12
Rachel Darwin Female 34 1982.05.15 5
Edward Cruze Male 30 1986.11.02 3
"

# read csv from string
con <- textConnection(lines)
people_info <- read.table(con, header = FALSE, stringsAsFactors = FALSE)
close(con)

# a.
names(people_info) <- c("Name", "Surname", "Gender", "Age", "DateOfBirth", "WorkingPeriod")

# b. Display Name, Gender, Age information of peole who have Age < 50
#people_info[people_info$Age < 50, names(people_info) %in% c("Name", "Gender", "Age")]
people_info[people_info$Age < 50, c("Name", "Gender", "Age")]

# c. Change Age of Sonya to 40 and DateOfBirth to 1976.09.21
# people_info[people_info$Name == "Sonya",]$Age = 40
# people_info[people_info$Name == "Sonya",]$DateOfBirth = "1976.09.21"
people_info[people_info$Name == "Sonya", c("Age", "DateOfBirth")] <- list(40, "1976.09.21")

# d. Decrease working period of all people to 1
people_info$WorkingPeriod = people_info$WorkingPeriod - 1
sapply(people_info, typeof)

# e. Add one more information of Choonmee (Name= Choonmee, Surname = Cha, Gender = Female, Age = 29, DateOfBirth = 1987.04.26, WorkingPeriod = 5)
people_info = rbind(people_info, list("Choonmee", "Cha", "Female", 29, "1987.04.26", 5))

# f. Calculate the average Age and average WorkingPeriod of all people
apply(people_info[, c("Age", "WorkingPeriod")], 2, mean)

# g. Remove the Surname information
#people_info[, -which(names(people_info) %in% c("Surname"))]
#people_info$Surname <- NULL
people_info[, c("Surname")] <- NULL

# h. Sort the data frame by increasing WorkingPeriod
#people_info[with(people_info, order(WorkingPeriod)), ]
people_info[order(people_info$WorkingPeriod), ]

# i. Create another data frame (people_hobby) as Chapter5_pic2
lines <- "
Name,Hobby
Jim,Voleyball
Rachel,Billardshe
Edward,Skiing
Dasan,Music
"
con <- textConnection(lines)
people_hobby <- read.csv(con, stringsAsFactors = FALSE)
close(con)

# j. Merge the (people_info) with (peole_hobby). Save the merged data  to  a excel file (Chapter5_result.csv) people_info
merge(people_info, people_hobby)
merge(people_hobby, people_info)


## ex6.R
lines <- "
이름,대중교통_접근시간[분],대중교통_접근수단,대중교통_통행목적,대중교통_유형,대중교통_이용시간[분]
박해영,15,도보,등하교,버스,20
차수현,5,도보,출퇴근,버스,15
이재한,20,자전거,출퇴근,지하철,20
김범주,17,택시,기타,버스,40
안치수,13,승용차,출퇴근,지하철+버스,60
김계철,30,자전거,학원,지하철,15
오윤서,12,도보,학원,버스,25
정헌기,6,자전거,등하교,지하철,20
황의경,9,도보,등하교,지하철+버스,30
장영철,13,승용차,출퇴근,지하철,45
"

# 1. 첨부한 csv 파일을 사용하여 테이블을 만들어 봅시다.

# read csv from string
con <- textConnection(lines)
transport_usage <- read.csv(con, header = TRUE, stringsAsFactors = TRUE)
close(con)

# 2. "대중교통\_접근수단"에 따른 "대중교통\_접근시간"의 평균을 구해봅시다.
tapply(transport_usage$대중교통_접근시간.분., transport_usage$대중교통_접근수단, mean)

# 3. "대중교통\_유형"에 따라 그룹을 나누고, 각 그룹에서 가장 큰 "대중교통\_이용시간"을 구해봅시다.
tapply(transport_usage$대중교통_이용시간.분., transport_usage$대중교통_유형, max)

# 4. 대중교통을 사용하여 "출퇴근"하는 사람들의 "출퇴근" 시간의 합을 구해봅시다.
sum(split(transport_usage$대중교통_이용시간.분., transport_usage$대중교통_통행목적)$출퇴근)
	#
	# ex 4-1
	#
	a <- list(1, "abc", FALSE)
	mode(a[[2]])
	typeof(a[[2]])


	#
	# ex 4-2
	#
	# 4-2.1
	x <- sample(10:20, 100, replace=TRUE)
	find_frequency <- function(key_range, values) {
	freq_list <- list()
	for (i in key_range) {
	freq_list[[as.character(i)]] = 0
	}
	for (value in values) {
	freq_list[[as.character(value)]] = freq_list[[as.character(value)]] + 1
	}
	return(freq_list)
	}
	freq_list <- find_frequency(10:20, x)

	# split을 사용하면 한 줄로 가능
	#freq_list <- lapply(split(1:length(x), x), length)

	# 4-2.2
	find_max_frequency <- function(freq_list) {
	highest = list(key='10', frequency = 0)
	for (i in names(freq_list)) {
	if (freq_list[[i]] > highest$frequency) {
	highest$key = i
	highest$frequency = freq_list[[i]]
	}
	}
	return(highest)
	}
	highest_element = find_max_frequency(freq_list)

	# 4-2.3
	sort(freq_unlist, decreasing = TRUE)[1]
	lines <- "
	Jim Hoffer Male 52 1964.03.18 30
	Sonya Martin Female 41 1975.09.21 12
	Rachel Darwin Female 34 1982.05.15 5
	Edward Cruze Male 30 1986.11.02 3
	"

	# read csv from string
	con <- textConnection(lines)
	people_info <- read.table(con, header = FALSE, stringsAsFactors = FALSE)
	close(con)

	# a.
	names(people_info) <- c("Name", "Surname", "Gender", "Age", "DateOfBirth", "WorkingPeriod")

	# b. Display Name, Gender, Age information of peole who have Age < 50
	#people_info[people_info$Age < 50, names(people_info) %in% c("Name", "Gender", "Age")]
	people_info[people_info$Age < 50, c("Name", "Gender", "Age")]

	# c. Change Age of Sonya to 40 and DateOfBirth to 1976.09.21
	# people_info[people_info$Name == "Sonya",]$Age = 40
	# people_info[people_info$Name == "Sonya",]$DateOfBirth = "1976.09.21"
	people_info[people_info$Name == "Sonya", c("Age", "DateOfBirth")] <- list(40, "1976.09.21")

	# d. Decrease working period of all people to 1
	people_info$WorkingPeriod = people_info$WorkingPeriod - 1
	sapply(people_info, typeof)

	# e. Add one more information of Choonmee (Name= Choonmee, Surname = Cha, Gender = Female, Age = 29, DateOfBirth = 1987.04.26, WorkingPeriod = 5)
	people_info = rbind(people_info, list("Choonmee", "Cha", "Female", 29, "1987.04.26", 5))

	# f. Calculate the average Age and average WorkingPeriod of all people
	apply(people_info[, c("Age", "WorkingPeriod")], 2, mean)

	# g. Remove the Surname information
	#people_info[, -which(names(people_info) %in% c("Surname"))]
	#people_info$Surname <- NULL
	people_info[, c("Surname")] <- NULL

	# h. Sort the data frame by increasing WorkingPeriod
	#people_info[with(people_info, order(WorkingPeriod)), ]
	people_info[order(people_info$WorkingPeriod), ]

	# i. Create another data frame (people_hobby) as Chapter5_pic2
	lines <- "
	Name,Hobby
	Jim,Voleyball
	Rachel,Billardshe
	Edward,Skiing
	Dasan,Music
	"
	con <- textConnection(lines)
	people_hobby <- read.csv(con, stringsAsFactors = FALSE)
	close(con)

	# j. Merge the (people_info) with (peole_hobby). Save the merged data to a excel file (Chapter5_result.csv) people_info
	merge(people_info, people_hobby)
	merge(people_hobby, people_info)
	lines <- "
	이름,대중교통_접근시간[분],대중교통_접근수단,대중교통_통행목적,대중교통_유형,대중교통_이용시간[분]
	박해영,15,도보,등하교,버스,20
	차수현,5,도보,출퇴근,버스,15
	이재한,20,자전거,출퇴근,지하철,20
	김범주,17,택시,기타,버스,40
	안치수,13,승용차,출퇴근,지하철+버스,60
	김계철,30,자전거,학원,지하철,15
	오윤서,12,도보,학원,버스,25
	정헌기,6,자전거,등하교,지하철,20
	황의경,9,도보,등하교,지하철+버스,30
	장영철,13,승용차,출퇴근,지하철,45
	"

	# 1. 첨부한 csv 파일을 사용하여 테이블을 만들어 봅시다.

	# read csv from string
	con <- textConnection(lines)
	transport_usage <- read.csv(con, header = TRUE, stringsAsFactors = TRUE)
	close(con)

	# 2. "대중교통\_접근수단"에 따른 "대중교통\_접근시간"의 평균을 구해봅시다.
	tapply(transport_usage$대중교통_접근시간.분., transport_usage$대중교통_접근수단, mean)

	# 3. "대중교통\_유형"에 따라 그룹을 나누고, 각 그룹에서 가장 큰 "대중교통\_이용시간"을 구해봅시다.
	tapply(transport_usage$대중교통_이용시간.분., transport_usage$대중교통_유형, max)

	# 4. 대중교통을 사용하여 "출퇴근"하는 사람들의 "출퇴근" 시간의 합을 구해봅시다.
	sum(split(transport_usage$대중교통_이용시간.분., transport_usage$대중교통_통행목적)$출퇴근)