irudnyts/students.R

## students.R
library("ggplot2")

log_alm <- read.csv(file = "/Users/irudnyts/Documents/projects/data/alm_logs.csv",
                    stringsAsFactors = FALSE)

exercises <- as.Date(c("2016-10-18", "2016-11-02", "2016-11-16", "2016-12-07"))
before_exercises <- exercises - 7
midterm <- as.Date("2016-11-23")

colnames(log_alm) <- tolower(colnames(log_alm))
log_alm$time <- as.POSIXct(strptime(log_alm$time, "%d/%m/%y, %H:%M"))
log_alm$date <- as.Date(log_alm$time)

log_alm <- log_alm[log_alm$date < as.Date("2016-12-23"), ]

ggplot(data = log_alm) + geom_bar(aes(x = date)) +
    theme_bw() +
    theme(text = element_text(size = 24))

smr <- data.frame(date = unique(log_alm$date), n_logins = NA)

for(date in unique(log_alm$date)) {
    smr[smr$date == date, "n_logins"] <- length(unique(log_alm[log_alm$date == date,
                                                               "user.full.name"]))

}

smr$class <- "no_class"
smr$class[weekdays(smr$date) %in% c("Tuesday", "Wednesday")] <- "lecture"
smr$class[smr$date %in% exercises] <- "exercise"
smr$class[smr$date %in% before_exercises] <- "before_exercise"
smr$class[smr$date == midterm] <- "midterm"


ggplot(data = smr, mapping = aes(x = date, y = n_logins)) +
    geom_bar(aes(fill = class), stat="identity") +
    theme_bw() +
    theme(text = element_text(size = 24))

ggplot(data = smr, mapping = aes(x = date, y = n_logins)) +
    geom_line() +
    stat_smooth(method ="auto", level = 0.95, span = 0.4) +
    theme_bw() +
    theme(text = element_text(size = 24))

# students' info
std <- sort(table(log_alm$user.full.name))
std <- data.frame(name = names(std), logins = as.vector(std))
std$name <- tolower(std$name)

grades <- read.csv(file = "/Users/irudnyts/Documents/projects/data/alm_grades.csv")
colnames(grades) <- c("id", "surname", "name", "grade", "pres")
grades$name <- tolower(paste(grades$name, grades$surname))

info <- merge(grades[, c("grade", "name")],
              std,
              all = TRUE)

info <- info[complete.cases(info), ]
info <- info[info$grade != 0, ]

model <- lm(info, formula = grade ~ logins)
summary(model)


# clustering analysis
cl <- kmeans(x = info[, -1], centers = 3)
info$cl <- cl$cluster
ggplot(data = info,
       mapping = aes(x = grade, y = logins, color = as.factor(cl))) +
    geom_point(size = 3) +
    theme_bw() +
    theme(text = element_text(size = 24))

info$grade_std <- (info$grade - mean(info$grade)) / sd(info$grade)
info$logins_std <- (info$logins - mean(info$logins)) / sd(info$logins)

cl2 <- kmeans(x = info[, c("logins_std", "grade_std")], centers = 3)

info$cl2 <- cl2$cluster

ggplot(data = info,
       mapping = aes(x = grade, y = logins, color = as.factor(cl2))) +
    geom_point(size = 3) +
    theme_bw() +
    theme(text = element_text(size = 24))
	library("ggplot2")

	log_alm <- read.csv(file = "/Users/irudnyts/Documents/projects/data/alm_logs.csv",
	stringsAsFactors = FALSE)

	exercises <- as.Date(c("2016-10-18", "2016-11-02", "2016-11-16", "2016-12-07"))
	before_exercises <- exercises - 7
	midterm <- as.Date("2016-11-23")

	colnames(log_alm) <- tolower(colnames(log_alm))
	log_alm$time <- as.POSIXct(strptime(log_alm$time, "%d/%m/%y, %H:%M"))
	log_alm$date <- as.Date(log_alm$time)

	log_alm <- log_alm[log_alm$date < as.Date("2016-12-23"), ]

	ggplot(data = log_alm) + geom_bar(aes(x = date)) +
	theme_bw() +
	theme(text = element_text(size = 24))

	smr <- data.frame(date = unique(log_alm$date), n_logins = NA)

	for(date in unique(log_alm$date)) {
	smr[smr$date == date, "n_logins"] <- length(unique(log_alm[log_alm$date == date,
	"user.full.name"]))

	}

	smr$class <- "no_class"
	smr$class[weekdays(smr$date) %in% c("Tuesday", "Wednesday")] <- "lecture"
	smr$class[smr$date %in% exercises] <- "exercise"
	smr$class[smr$date %in% before_exercises] <- "before_exercise"
	smr$class[smr$date == midterm] <- "midterm"


	ggplot(data = smr, mapping = aes(x = date, y = n_logins)) +
	geom_bar(aes(fill = class), stat="identity") +
	theme_bw() +
	theme(text = element_text(size = 24))

	ggplot(data = smr, mapping = aes(x = date, y = n_logins)) +
	geom_line() +
	stat_smooth(method ="auto", level = 0.95, span = 0.4) +
	theme_bw() +
	theme(text = element_text(size = 24))

	# students' info
	std <- sort(table(log_alm$user.full.name))
	std <- data.frame(name = names(std), logins = as.vector(std))
	std$name <- tolower(std$name)

	grades <- read.csv(file = "/Users/irudnyts/Documents/projects/data/alm_grades.csv")
	colnames(grades) <- c("id", "surname", "name", "grade", "pres")
	grades$name <- tolower(paste(grades$name, grades$surname))

	info <- merge(grades[, c("grade", "name")],
	std,
	all = TRUE)

	info <- info[complete.cases(info), ]
	info <- info[info$grade != 0, ]

	model <- lm(info, formula = grade ~ logins)
	summary(model)


	# clustering analysis
	cl <- kmeans(x = info[, -1], centers = 3)
	info$cl <- cl$cluster
	ggplot(data = info,
	mapping = aes(x = grade, y = logins, color = as.factor(cl))) +
	geom_point(size = 3) +
	theme_bw() +
	theme(text = element_text(size = 24))

	info$grade_std <- (info$grade - mean(info$grade)) / sd(info$grade)
	info$logins_std <- (info$logins - mean(info$logins)) / sd(info$logins)

	cl2 <- kmeans(x = info[, c("logins_std", "grade_std")], centers = 3)

	info$cl2 <- cl2$cluster

	ggplot(data = info,
	mapping = aes(x = grade, y = logins, color = as.factor(cl2))) +
	geom_point(size = 3) +
	theme_bw() +
	theme(text = element_text(size = 24))