Skip to content

Instantly share code, notes, and snippets.

@dggoldst
Created November 6, 2017 20:47
Show Gist options
  • Save dggoldst/134d34bef365c393f7ebbc70e0b0fe39 to your computer and use it in GitHub Desktop.
Save dggoldst/134d34bef365c393f7ebbc70e0b0fe39 to your computer and use it in GitHub Desktop.
flips_streaks.R
library(stringr)
library(ggplot2)
library(dplyr)
library(scales)
library(markovchain)
MAXSTREAKLEN=16
#H/T https://math.stackexchange.com/questions/383704/probability-of-streaks for this soln
get_prob = function(streaklen,sequencelen)
{
trans=matrix(0,nrow=streaklen,ncol=streaklen)
for(i in 1:streaklen)
{
if(1==i) {trans[i,1:2]=.5}
else if(i<=(streaklen-1)) {trans[i,c(1,i+1)]=.5}
else if(i==streaklen) {trans[i,streaklen]=1}
else {stop("badness")}
}
mcFlips <- new("markovchain",
states = as.character(1:streaklen),
transitionMatrix = trans)
initialState <- c(1, rep(0,streaklen-1))
(initialState * (mcFlips ^ (sequencelen-1)))[streaklen]
}
#MORE LIKELY THAN NOT (>50% likely)
answers50 = rep(0,MAXSTREAKLEN)
currseqlen = 2
system.time({
for(streaklen in 2:MAXSTREAKLEN)
{
while(get_prob(streaklen,currseqlen)<=.5) { currseqlen = currseqlen+1 }
answers50[streaklen]=currseqlen
}
})
#HIGHLY LIKELY (>99% likely)
answers99 = rep(0,MAXSTREAKLEN)
currseqlen = 2
for(streaklen in 2:MAXSTREAKLEN)
{
while(get_prob(streaklen,currseqlen)<=.99) { currseqlen = currseqlen+1 }
answers99[streaklen]=currseqlen
}
plot_data = data.frame(
streak_length = 1:16,
sequence_length_50 = answers50,
sequence_length_99 = answers99
)[2:10,]
p=ggplot(plot_data,aes(streak_length,sequence_length_50,label=sequence_length_50))
p=p+theme_bw() +
geom_bar(stat="identity",fill="lightblue") +
geom_text(aes(y=sequence_length_50+20)) +
scale_x_continuous(breaks=1:(nrow(plot_data)+1)) +
scale_y_continuous(breaks=seq(0,800,by=100)) +
labs(
x="Streak Length",
y="Number of Flips",
title="Minimum Number of Flips Needed for a Streak\nTo be More Likely Than Not",
subtitle="That is, streak probability greater than 50%"
)
p
ggsave(plot=p,file="flips_50.png",height=4,width=5,dpi=600)
###
plot_data = data.frame(
streak_length = 1:16,
sequence_length_50 = answers50,
sequence_length_99 = answers99
)[2:10,]
p=ggplot(plot_data,aes(streak_length,sequence_length_99,label=comma(sequence_length_99)))
p=p+theme_bw() +
geom_line(alpha=.5,color="blue",aes(y=sequence_length_50)) +
geom_line(alpha=.5) +
geom_text(aes(y=sequence_length_99+400)) +
geom_text(aes(y=sequence_length_50-200,label=comma(sequence_length_50))) +
scale_x_continuous(breaks=1:(nrow(plot_data)+1)) +
labs(
x="Streak Length",
y="Number of Flips",
title="Minimum Number of Flips Needed for Streaks\nTo be Likely or Highly Likely",
subtitle="That is, streak probabilities greater than 50% and 99%"
)
p
ggsave(plot=p,file="flips_50_99.png",height=4,width=5,dpi=600)
#Log version
plot_data = data.frame(
streak_length = 1:16,
sequence_length_50 = answers50,
sequence_length_99 = answers99
)[2:16,]
p=ggplot(plot_data,aes(streak_length,sequence_length_99,label=comma(sequence_length_99)))
p=p+theme_bw() +
geom_line(alpha=.5,color="blue",aes(y=sequence_length_50)) +
geom_line(alpha=.5) +
geom_text(aes(y=sequence_length_99*1.5),size=2.5) +
geom_text(aes(y=sequence_length_50*.5,label=comma(sequence_length_50)),size=2.5) +
scale_x_continuous(breaks=1:(nrow(plot_data)+1)) +
scale_y_log10(breaks=c(3,10,30,100,300,1000,3000,10000,30000,100000,300000),label=comma) +
#scale_y_continuous(breaks=seq(0,800,by=100)) +
labs(
x="Streak Length",
y="Number of Flips",
title="Minimum Number of Flips Needed for Streaks\nTo be Likely or Highly Likely",
subtitle="That is, streak probabilities greater than 50% and 99%"
)
p
ggsave(plot=p,file="flips_log.png",height=4,width=6,dpi=600)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment