Skip to content

Instantly share code, notes, and snippets.

@ko-lem
Last active August 29, 2015 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ko-lem/9071364 to your computer and use it in GitHub Desktop.
Save ko-lem/9071364 to your computer and use it in GitHub Desktop.
Analysis of Swertres lotto results.
setwd("~/Dropbox/Projects/suertres-analysis/")
data <- read.csv("data/processed/suertres.csv", header=TRUE,
colClasses=c(rep("factor", 6), "integer"))
data$date <- as.Date(data$date, format= "%m/%d/%Y")
length(data)
length(data[,1])
winners.per.date <- tapply(data$num.winners, data$date, sum)
plot(as.Date(names(winners.per.date)), winners.per.date, type="l",
xlab="Date", ylab="Number of winners",
main="Number of winners over time")
min(data$date[data$time.slot == "11am"])
min(data$date[data$time.slot == "4pm"])
min(data$date[data$time.slot == "9pm"])
sum(winners.per.date)
sum(winners.per.date) * 4500
digit1.counts <- tapply(data$digit1, data$digit1, length)
digit2.counts <- tapply(data$digit2, data$digit2, length)
digit3.counts <- tapply(data$digit3, data$digit3, length)
digit.counts <- t(cbind(digit1.counts, digit2.counts, digit3.counts))
barplot(digit.counts, main="Digit occurences by place",
xlab="Digit", col=c("darkblue","red", "green"),
legend = c("1st", "2nd", "3rd"), beside=TRUE)
result.counts <- tapply(data$result, data$result, length)
barplot(result.counts, main="Number of times a winning result has occurred",
xlab="Winning Result")
min.count <- result.counts[which.min(result.counts)]
max.count <- result.counts[which.max(result.counts)]
result.counts[result.counts == min.count]
result.counts[result.counts == max.count]
mean(result.counts)
median(result.counts)
earliest.occurences <- as.Date(tapply(data$date, data$result, min), origin="1970-01-01")
which.max(earliest.occurences)
max(earliest.occurences)
require 'nokogiri'
output_filename = "data/processed/suertres.csv"
input_filenames = {
"11am" => "data/raw/11am.html",
"4pm" => "data/raw/4pm.html",
"9pm" => "data/raw/9pm.html",
}
def process(input_filenames, output_filename)
File.open(output_filename, "w") do |f|
f.puts("time.slot,result,digit1,digit2,digit3,date,num.winners")
get_lines_to_write(input_filenames) do |line_to_write|
f.puts(line_to_write)
end
end
end
# this will yield csv strings of the lines we'd write out
# eg: "11am,143,1,4,3,11/16/2006,218"
def get_lines_to_write(input_filenames)
input_filenames.each do |time_slot, filename|
row_data(filename) do |data|
arr = [time_slot] + data
yield arr.join(",")
end
end
end
# this will yield arrays of the data we are interested in
# eg: ['535', '5', '3', '5', '11/8/2006', '155']
def row_data(input_filename)
html_string = IO.read(input_filename)
html = Nokogiri::HTML(html_string)
grid_rows = html.css('table#ctl00_cphContent_gridLotto tr')
# skip first row since it's a header
grid_rows[1..-1].each do |row|
columns = row.css('td')
result_digits = columns[1].text.strip.split("-")
date, num_winners = [2, 4].map { |i| columns[i].text.strip }
yield [result_digits.join] + result_digits + [date, num_winners]
end
end
process(input_filenames, output_filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment