Skip to content

Instantly share code, notes, and snippets.

@irudnyts
Last active September 5, 2017 09:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save irudnyts/53b25b8405ac3fb2289e20eb15d17860 to your computer and use it in GitHub Desktop.
Save irudnyts/53b25b8405ac3fb2289e20eb15d17860 to your computer and use it in GitHub Desktop.
The code for the blog post about students' data.
library("ggplot2")
log_alm <- read.csv(file = "/Users/irudnyts/Documents/projects/data/alm_logs.csv",
stringsAsFactors = FALSE)
exercises <- as.Date(c("2016-10-18", "2016-11-02", "2016-11-16", "2016-12-07"))
before_exercises <- exercises - 7
midterm <- as.Date("2016-11-23")
colnames(log_alm) <- tolower(colnames(log_alm))
log_alm$time <- as.POSIXct(strptime(log_alm$time, "%d/%m/%y, %H:%M"))
log_alm$date <- as.Date(log_alm$time)
log_alm <- log_alm[log_alm$date < as.Date("2016-12-23"), ]
ggplot(data = log_alm) + geom_bar(aes(x = date)) +
theme_bw() +
theme(text = element_text(size = 24))
smr <- data.frame(date = unique(log_alm$date), n_logins = NA)
for(date in unique(log_alm$date)) {
smr[smr$date == date, "n_logins"] <- length(unique(log_alm[log_alm$date == date,
"user.full.name"]))
}
smr$class <- "no_class"
smr$class[weekdays(smr$date) %in% c("Tuesday", "Wednesday")] <- "lecture"
smr$class[smr$date %in% exercises] <- "exercise"
smr$class[smr$date %in% before_exercises] <- "before_exercise"
smr$class[smr$date == midterm] <- "midterm"
ggplot(data = smr, mapping = aes(x = date, y = n_logins)) +
geom_bar(aes(fill = class), stat="identity") +
theme_bw() +
theme(text = element_text(size = 24))
ggplot(data = smr, mapping = aes(x = date, y = n_logins)) +
geom_line() +
stat_smooth(method ="auto", level = 0.95, span = 0.4) +
theme_bw() +
theme(text = element_text(size = 24))
# students' info
std <- sort(table(log_alm$user.full.name))
std <- data.frame(name = names(std), logins = as.vector(std))
std$name <- tolower(std$name)
grades <- read.csv(file = "/Users/irudnyts/Documents/projects/data/alm_grades.csv")
colnames(grades) <- c("id", "surname", "name", "grade", "pres")
grades$name <- tolower(paste(grades$name, grades$surname))
info <- merge(grades[, c("grade", "name")],
std,
all = TRUE)
info <- info[complete.cases(info), ]
info <- info[info$grade != 0, ]
model <- lm(info, formula = grade ~ logins)
summary(model)
# clustering analysis
cl <- kmeans(x = info[, -1], centers = 3)
info$cl <- cl$cluster
ggplot(data = info,
mapping = aes(x = grade, y = logins, color = as.factor(cl))) +
geom_point(size = 3) +
theme_bw() +
theme(text = element_text(size = 24))
info$grade_std <- (info$grade - mean(info$grade)) / sd(info$grade)
info$logins_std <- (info$logins - mean(info$logins)) / sd(info$logins)
cl2 <- kmeans(x = info[, c("logins_std", "grade_std")], centers = 3)
info$cl2 <- cl2$cluster
ggplot(data = info,
mapping = aes(x = grade, y = logins, color = as.factor(cl2))) +
geom_point(size = 3) +
theme_bw() +
theme(text = element_text(size = 24))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment