/laplace-processing.R Secret
Created
January 30, 2023 16:37
Star
You must be signed in to star a gist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Calculating the Laplace probability for unsolved mathematical conjectures | |
## install.packages('ggplot2') | |
library(ggplot2) | |
library(ggthemes) | |
## import data | |
dir <- "/home/loki/Blog/wip/laplace-in-practice/.source/" ## <- change. | |
data <- read.csv(paste(dir, "unsolved-mathematical-theorems-with-date.csv", sep=""), header=TRUE) | |
View(data) | |
## define Laplace function | |
laplace = function (year_conjectured, year_now, num_years){ | |
# year_conjectured = 1970 | |
# year_now = 2022 | |
# num_years = 10 | |
years = c(0:(num_years-1)) | |
conditional_laplace_probabilities = sapply(years, function (n){ return (1/((year_now - year_conjectured) + 2 + n))}) | |
probabilities = rep(NA, num_years) | |
for(i in c(1:num_years)){ | |
if(i == 1){ | |
probabilities[1] = conditional_laplace_probabilities[1] | |
}else{ | |
probability_has_happened_before = sum(probabilities[1:(i-1)]) | |
# this (^) is the sum, because the probabilities are unconditional, | |
# i.e., consider if p1=0.5 and p2=0.5 | |
probabilities[i] = (1 - probability_has_happened_before) * conditional_laplace_probabilities[i] | |
} | |
} | |
result = sum(probabilities) | |
return(result) | |
} | |
## Example | |
result1 = laplace(1970, 2022, 3) | |
p1 = 1/(2022 - 1970 + 2 ) | |
p2 = (1 - p1) * 1/(2022 - 1970 + 2 +1 ) | |
p3 = (1 - p1 - p2) * 1/(2022 - 1970 + 2 + 2) | |
result2 = p1 + p2 + p3 | |
result1 == result2 ## Should be TRUE | |
## Process data | |
colnames(data) | |
laplace3 = sapply(data$Approximate.formulation.year, function(n){ return(laplace(n, (2022 + 11/12), 3))}) | |
laplace5 = sapply(data$Approximate.formulation.year, function(n){ return(laplace(n, (2022 + 11/12), 5))}) | |
laplace10 = sapply(data$Approximate.formulation.year, function(n){ return(laplace(n, (2022 + 11/12), 10))}) | |
data$"Laplace 3 years from 2022-11-1" = laplace3 | |
data$"Laplace 5 years from 2022-11-1" = laplace5 | |
data$"Laplace 10 years from 2022-11-1" = laplace10 | |
colnames(data) | |
View(data) | |
data <- write.csv(data, file=paste(dir, "unsolved-mathematical-theorems-with-date-with-laplace-probability.csv", sep="")) | |
## Calculate distribution of probabilities | |
l = length(laplace5) | |
l | |
getSample = function(probs){ | |
l = length(probs) | |
xs = runif(l) | |
num_true = 0 | |
for(i in c(1:l)){ | |
if(xs[i] < probs[i]){ | |
num_true = num_true + 1 | |
} | |
} | |
return(num_true) | |
} | |
set.seed(0) | |
getSample5 = function(n){ getSample(laplace5) } | |
samples5 = sapply(c(1:10000), getSample5) | |
sum(samples5) # as a check, 171863 | |
getSample10 = function(n){ getSample(laplace10) } | |
samples10 = sapply(c(1:10000), getSample10) | |
sum(samples10) # as a check, 312202 | |
getSample3 = function(n){ getSample(laplace3) } | |
samples3 = sapply(c(1:10000), getSample3) | |
sum(samples3) # as a check, 107275 | |
## Get confidence intervals | |
getCdf = function(sampleset, density){ | |
l = length(sampleset) | |
point = round(l * density) | |
sortedSampleSet = sort(sampleset) | |
result = sortedSampleSet[point] | |
return(result) | |
} | |
getCdf(samples3, 0.05) | |
getCdf(samples3, 0.95) | |
getCdf(samples3, 0.01) | |
getCdf(samples3, 0.99) | |
getCdf(samples5, 0.05) | |
getCdf(samples5, 0.95) | |
getCdf(samples5, 0.01) | |
getCdf(samples5, 0.99) | |
getCdf(samples10, 0.05) | |
getCdf(samples10, 0.95) | |
getCdf(samples10, 0.01) | |
getCdf(samples10, 0.99) | |
## Create histogram | |
samples = list() | |
samples$by_3 = samples3 | |
samples$by_5 = samples5 | |
samples$by_10 = samples10 | |
samples = as.data.frame(samples) | |
ggplot(samples, aes(by_5, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white") | |
ggplot(samples, aes(by_10, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white") | |
## Display histograms nicely | |
### For 3 years | |
title_text = "How many of these conjectures will be solved in 3 years" | |
subtitle_text = "according to Laplace's law" | |
label_x_axis = element_blank() | |
label_y_axis = "density" | |
ggplot(samples, aes(by_3, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white")+ | |
labs( | |
title=title_text, | |
subtitle=subtitle_text, | |
x=label_x_axis, | |
y=label_y_axis | |
) + | |
# scale_y_continuous(labels = scales::percent, breaks=seq(0,1,by=0.01), limits=c(0,0.11)) + | |
scale_x_continuous(breaks=seq(2,40,by=2), limits=c(1,22)) + | |
theme_tufte() + | |
theme( | |
axis.ticks.x = element_blank(), | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = 0.5), | |
plot.subtitle = element_text(hjust = 0.5), | |
legend.position="bottom", | |
legend.box="vertical", | |
axis.text.x=element_text(angle=0, hjust=0.5), | |
plot.background=element_rect(fill = "white",colour = NA) | |
) | |
setwd(dir) | |
height = 5 | |
width = floor(height*(1+sqrt(5))/3) | |
ggsave("sample3.png", width=width, height=height) | |
### For 5 years | |
title_text = "How many of these conjectures will be solved in 5 years" | |
subtitle_text = "according to Laplace's law" | |
label_x_axis = element_blank() | |
label_y_axis = "density" | |
ggplot(samples, aes(by_5, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white")+ | |
labs( | |
title=title_text, | |
subtitle=subtitle_text, | |
x=label_x_axis, | |
y=label_y_axis | |
) + | |
# scale_y_continuous(labels = scales::percent, breaks=seq(0,1,by=0.01), limits=c(0,0.11)) + | |
scale_x_continuous(breaks=seq(1,40,by=2), limits=c(7,29)) + | |
theme_tufte() + | |
theme( | |
axis.ticks.x = element_blank(), | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = 0.5), | |
plot.subtitle = element_text(hjust = 0.5), | |
legend.position="bottom", | |
legend.box="vertical", | |
axis.text.x=element_text(angle=0, hjust=0.5), | |
plot.background=element_rect(fill = "white",colour = NA) | |
) | |
setwd(dir) | |
height = 5 | |
width = floor(height*(1+sqrt(5))/3) | |
ggsave("sample5.png", width=width, height=height) | |
### For 10 years | |
title_text = "How many of these conjectures will be solved in 10 years" | |
subtitle_text = "according to Laplace's law" | |
label_x_axis = element_blank() | |
label_y_axis = "density" | |
ggplot(samples, aes(by_10, after_stat(density))) + | |
geom_histogram(binwidth = 1, fill="navyblue", color="white")+ | |
labs( | |
title=title_text, | |
subtitle=subtitle_text, | |
x=label_x_axis, | |
y=label_y_axis | |
) + | |
# scale_y_continuous(labels = scales::percent, breaks=seq(0,1,by=0.01), limits=c(0,0.11)) + | |
scale_x_continuous(breaks=seq(1,100,by=2), limits=c(17,47)) + | |
theme_tufte() + | |
theme( | |
axis.ticks.x = element_blank(), | |
legend.title = element_blank(), | |
plot.title = element_text(hjust = 0.5), | |
plot.subtitle = element_text(hjust = 0.5), | |
legend.position="bottom", | |
legend.box="vertical", | |
axis.text.x=element_text(angle=0, hjust=0.5), | |
plot.background=element_rect(fill = "white",colour = NA) | |
) | |
setwd(dir) | |
height = 5 | |
width = floor(height*(1+sqrt(5))/3) | |
ggsave("sample10.png", width=width, height=height) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment