Skip to content

Instantly share code, notes, and snippets.

@sfrechette
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sfrechette/585e4ca9cdab7eacc92b to your computer and use it in GitHub Desktop.
Save sfrechette/585e4ca9cdab7eacc92b to your computer and use it in GitHub Desktop.
Ottawa weather vs. Ottawa JavaScript Meetup attendance: an R analysis
library(weatherData)
library(dplyr)
library(ggplot2)
library(gridExtra)
# Ottawa International Airport (YOW) weather data
getWeatherForYear = function(year) {
getWeatherForDate('YOW',
start_date= paste(sep='', year, '-01-01'),
end_date = paste(sep='', year, '-12-31'),
opt_detailed = FALSE,
opt_all_columns = TRUE)
}
# Execute functions - get data
df_weather = rbind(getWeatherForYear(2012),
getWeatherForYear(2013),
getWeatherForYear(2014),
getWeatherForDate('YOW', start_date='2015-01-01',
end_date = '2015-02-11',
opt_detailed = FALSE,
opt_all_columns = TRUE))
# Write and read weather data
write.csv(df_weather, 'ottawa_weatherdata.csv', row.names = TRUE)
weather = read.csv('ottawa_weatherdata.csv')
weather$Date = as.POSIXct(weather$Date)
weather %>% sample_n(10) %>% select(Date, Min_TemperatureC, Mean_TemperatureC, Max_TemperatureC)
# Read Meetup data
events = read.csv('ottawajs_events.csv')
events$date = as.POSIXct(events$date)
events %>% sample_n(10) %>% select(event, rsvps, date)
# Group average attendance event by month
by_month = events %>%
mutate(month = factor(format(date, "%B"), levels=month.name)) %>%
group_by(month) %>%
summarise(events = n(),
count = sum(rsvps)) %>%
mutate(avg = count / events) %>%
arrange(desc(avg))
# Group average temperature by month
averagetemperature_bymonth = weather %>%
mutate(month = factor(format(Date, "%B"), levels=month.name)) %>%
group_by(month) %>%
summarise(avg_temperature = mean(Mean_TemperatureC))
plot_temperature = ggplot(aes(x = month, y = avg_temperature, group=1), data = averagetemperature_bymonth) +
geom_line( ) + ggtitle("Average Temperature by Month")
plot_attendance = ggplot(aes(x = month, y = count, group=1), data = by_month) +
geom_bar(stat="identity", fill="grey50") +
ggtitle("Average Attendance by Month")
grid.arrange(plot_temperature, plot_attendance, ncol = 1)
# Group by day
by_day = events %>%
mutate(day = (as.POSIXct(events$date))) %>%
group_by(day) %>%
summarise(events = n(),
count = sum(rsvps)) %>%
mutate(avg = count / events) %>%
arrange(desc(avg))
weather = weather %>% mutate(day = Date)
merged = merge(weather, by_day, by = 'day')
ggplot(aes(x = count, y = Mean_TemperatureC, group = day), data = merged) + geom_point()
# Any correlation between the temperature and attendance?
cor(merged$count, merged$Mean_TemperatureC)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment