-
-
Save NunoSempere/45b16dcb6c9e240a698beb001cb1f266 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Calculating the Laplace probability for unsolved mathematical conjectures | |
## install.packages('ggplot2') | |
library(ggplot2) | |
library(ggthemes) | |
## import data | |
dir <- "/home/loki/Blog/wip/laplace-in-practice/.source/" ## <- change. | |
data <- read.csv(paste(dir, "unsolved-mathematical-theorems-with-date.csv", sep=""), header=TRUE) | |
View(data) | |
## define Laplace function | |
laplace = function (year_conjectured, year_now, num_years){ | |
# year_conjectured = 1970 | |
# year_now = 2022 | |
# num_years = 10 | |
years = c(0:(num_years-1)) | |
conditional_laplace_probabilities = sapply(years, function (n){ return (1/((year_now - year_conjectured) + 2 + n))}) | |
probabilities = rep(NA, num_years) | |
for(i in c(1:num_years)){ | |
if(i == 1){ | |
probabilities[1] = conditional_laplace_probabilities[1] | |
}else{ | |
probability_has_happened_before = sum(probabilities[1:(i-1)]) | |
# this (^) is the sum, because the probabilities are unconditional, | |
# i.e., consider if p1=0.5 and p2=0.5 | |
probabilities[i] = (1 - probability_has_happened_before) * conditional_laplace_probabilities[i] | |
} | |
} | |
result = sum(probabilities) | |
return(result) | |
} | |
## Example | |
result1 = laplace(1970, 2022, 3) | |
p1 = 1/(2022 - 1970 + 2 ) | |
p2 = (1 - p1) * 1/(2022 - 1970 + 2 +1 ) | |
p3 = (1 - p1 - p2) * 1/(2022 - 1970 + 2 + 2) | |
result2 = p1 + p2 + p3 | |
result1 == result2 ## Should be TRUE | |
## Process data | |
colnames(data) | |
laplace3 = sapply(data$Approximate.formulation.year, function(n){ return(laplace(n, (2022 + 11/12), 3))}) | |
laplace5 = sapply(data$Approximate.formulation.year, function(n){ return(laplace(n, (2022 + 11/12), 5))}) | |
laplace10 = sapply(data$Approximate.formulation.year, function(n){ return(laplace(n, (2022 + 11/12), 10))}) | |
data$"Laplace 3 years from 2022-11-1" = laplace3 | |
data$"Laplace 5 years from 2022-11-1" = laplace5 | |
data$"Laplace 10 years from 2022-11-1" = laplace10 | |
colnames(data) | |
View(data) | |
data <- write.csv(data, file=paste(dir, "unsolved-mathematical-theorems-with-date-with-laplace-probability.csv", sep="")) | |
## Calculate distribution of probabilities | |
l = length(laplace5) | |
l | |
getSample = function(probs){ | |
l = length(probs) | |
xs = runif(l) | |
num_true = 0 | |
for(i in c(1:l)){ | |
if(xs[i] < probs[i]){ | |
num_true = num_true + 1 | |
} | |
} | |
return(num_true) | |
} | |
set.seed(0) | |
getSample5 = function(n){ getSample(laplace5) } | |
samples5 = sapply(c(1:10000), getSample5) | |
sum(samples5) # as a check, 171863 | |
getSample10 = function(n){ getSample(laplace10) } | |
samples10 = sapply(c(1:10000), getSample10) | |
sum(samples10) # as a check, 312202 | |
getSample3 = function(n){ getSample(laplace3) } | |
samples3 = sapply(c(1:10000), getSample3) | |
sum(samples3) # as a check, 107275 | |
## Get confidence intervals | |
getCdf = function(sampleset, density){ | |
l = length(sampleset) | |
point = round(l * density) | |
sortedSampleSet = sort(sampleset) | |
result = sortedSampleSet[point] | |
return(result) | |
} | |
getCdf(samples3, 0.05) | |
getCdf(samples3, 0.95) | |
getCdf(samples3, 0.01) | |
getCdf(samples3, 0.99) | |
getCdf(samples5, 0.05) | |
getCdf(samples5, 0.95) | |
getCdf(samples5, 0.01) | |
getCdf(samples5, 0.99) | |
getCdf(samples10, 0.05) | |
getCdf(samples10, 0.95) | |
getCdf(samples10, 0.01) | |
getCdf(samples10, 0.99) | |
## Create histogram | |
samples = list() | |
samples$by_3 = samples3 | |
samples$by_5 = samples5 | |
samples$by_10 = samples10 | |
samples = as.data.frame(samples) | |
ggplot(samples, aes(by_5, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white") | |
ggplot(samples, aes(by_10, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white") | |
## Display histograms nicely | |
### For 3 years | |
title_text = "How many of these conjectures will be solved in 3 years" | |
subtitle_text = "according to Laplace's law" | |
label_x_axis = element_blank() | |
label_y_axis = "density" | |
ggplot(samples, aes(by_3, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white")+ | |
labs( | |
title=title_text, | |
subtitle=subtitle_text, | |
x=label_x_axis, | |
y=label_y_axis | |
) + | |
# scale_y_continuous(labels = scales::percent, breaks=seq(0,1,by=0.01), limits=c(0,0.11)) + | |
scale_x_continuous(breaks=seq(2,40,by=2), limits=c(1,22)) + | |
theme_tufte() + | |
theme( | |
axis.ticks.x = element_blank(), | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = 0.5), | |
plot.subtitle = element_text(hjust = 0.5), | |
legend.position="bottom", | |
legend.box="vertical", | |
axis.text.x=element_text(angle=0, hjust=0.5), | |
plot.background=element_rect(fill = "white",colour = NA) | |
) | |
setwd(dir) | |
height = 5 | |
width = floor(height*(1+sqrt(5))/3) | |
ggsave("sample3.png", width=width, height=height) | |
### For 5 years | |
title_text = "How many of these conjectures will be solved in 5 years" | |
subtitle_text = "according to Laplace's law" | |
label_x_axis = element_blank() | |
label_y_axis = "density" | |
ggplot(samples, aes(by_5, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white")+ | |
labs( | |
title=title_text, | |
subtitle=subtitle_text, | |
x=label_x_axis, | |
y=label_y_axis | |
) + | |
# scale_y_continuous(labels = scales::percent, breaks=seq(0,1,by=0.01), limits=c(0,0.11)) + | |
scale_x_continuous(breaks=seq(1,40,by=2), limits=c(7,29)) + | |
theme_tufte() + | |
theme( | |
axis.ticks.x = element_blank(), | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = 0.5), | |
plot.subtitle = element_text(hjust = 0.5), | |
legend.position="bottom", | |
legend.box="vertical", | |
axis.text.x=element_text(angle=0, hjust=0.5), | |
plot.background=element_rect(fill = "white",colour = NA) | |
) | |
setwd(dir) | |
height = 5 | |
width = floor(height*(1+sqrt(5))/3) | |
ggsave("sample5.png", width=width, height=height) | |
### For 10 years | |
title_text = "How many of these conjectures will be solved in 10 years" | |
subtitle_text = "according to Laplace's law" | |
label_x_axis = element_blank() | |
label_y_axis = "density" | |
ggplot(samples, aes(by_10, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white")+ | |
labs( | |
title=title_text, | |
subtitle=subtitle_text, | |
x=label_x_axis, | |
y=label_y_axis | |
) + | |
# scale_y_continuous(labels = scales::percent, breaks=seq(0,1,by=0.01), limits=c(0,0.11)) + | |
scale_x_continuous(breaks=seq(1,100,by=2), limits=c(17,47)) + | |
theme_tufte() + | |
theme( | |
axis.ticks.x = element_blank(), | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = 0.5), | |
plot.subtitle = element_text(hjust = 0.5), | |
legend.position="bottom", | |
legend.box="vertical", | |
axis.text.x=element_text(angle=0, hjust=0.5), | |
plot.background=element_rect(fill = "white",colour = NA) | |
) | |
setwd(dir) | |
height = 5 | |
width = floor(height*(1+sqrt(5))/3) | |
ggsave("sample10.png", width=width, height=height) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment