Skip to content

Instantly share code, notes, and snippets.

@rundel
Created September 3, 2017 02:12
Show Gist options
  • Save rundel/5585a28862480da1bf43bd7997260b71 to your computer and use it in GitHub Desktop.
Save rundel/5585a28862480da1bf43bd7997260b71 to your computer and use it in GitHub Desktop.
library(shiny)
library(tidyverse)
library(gridExtra)
ui <- fluidPage(
titlePanel("Frequency Decryption: A Broken Algorithm"),
tags$br(),
tags$br(),
sidebarLayout(
sidebarPanel(
textAreaInput(inputId = "ins",
label="insert cipher text here",
width = "400px",
height = "500px",
placeholder = "ciphertext")
),
mainPanel(
tags$div(tags$p((textOutput(outputId = "outs"))),
tags$div(class="header", checked=NA,
tags$h3("How does this algorithm work?"),
tags$p("And why doesn't it work?"),
tags$br(),
tags$p("The decryption algorithm is based on the frequency
with which letters appear in English speech and prose.
Letters like E, T, A, and O are much more common than
J, X, Q, and Z, and their frequencies follow a distribution.
This is actually true for every language, and is one of the
ways in which linguists determine whether an unknown language
is real or a hoax. What this algorithm does is take your
ciphertext and re-sort it based on letter frequency, then
transposes those letters based on the frequency in the
English language."),
tags$p("So if the most common letter in your ciphertext is Q, then
it will be replaced with the most common letter in English, E."),
tags$p("So why doesn't it work? Because the frequencies given are
based on standard English speech overall-- even encrypted text
with tens of thousands of words still doesn't always approximate
the actual frequencies of letters in English speech."),
tags$p("If you use this tool with a big enough sample, you'll find
whispers of real words start to become apparent, but even
common words like 'the' will still end up looking silly, like
'tde'.")
),
plotOutput("c3"),
plotOutput("c1c2")
)
)
)
)
server <- function(input, output){
library(shiny)
library(tidyverse)
library(gridExtra)
freq.sort<-function(x){
english.by.frequency <- "etaoinshrdlcumwfgypbvkjxqz"
cleaned <- gsub(pattern = "\\W|\\d", replace = "", tolower(x))
letter.frequencies <- substring(cleaned, 1:nchar(cleaned),1:nchar(cleaned))
tab <- sort(table(letter.frequencies), decreasing = TRUE)
ordered_characters <- dimnames(tab)$letter.frequencies
ordered_in_a_string <- paste(ordered_characters, collapse = "")
excess <- (26 - nchar(ordered_in_a_string))
addendum1 <- rep.int("_", excess)
addendum2 <- paste(addendum1, collapse="")
final <- paste(ordered_in_a_string, addendum2, sep="")
cipher <- chartr(final,english.by.frequency,x)
finaltext <- cat(cipher)
return(finaltext)
}
output$outs <- renderPrint({
freq.sort(input$ins)
})
#######################
# code for the Eng graph
lets<-c("e", "t", "a", "o", "i", "n", "s", "h", "r", "d", "l", "c", "u", "m", "w", "f", "g", "y", "p", "b", "v", "k", "j", "x", "q", "z")
percs<-c(12.702, 9.056, 8.167, 7.507, 6.966, 6.749, 6.327, 6.094, 5.987, 4.253, 4.025, 2.782, 2.758, 2.406, 2.36, 2.228, 2.015, 1.974, 1.929, 1.492, 0.978, 0.772, 0.153, 0.15, 0.095, 0.074)
eng<-data.frame(lets,percs)
output$c3 <- renderPlot({
ggplot(eng, aes(x = reorder(lets, -percs), y = percs, fill = percs)) +
geom_col() +
scale_fill_gradient(low = "#DCCD59", high = "#2F3316")+
labs(x = "letter",
y = "frequency",
title = "Frequency of Letters in Standard English",
subtitle = "By Percent")
})
############# end #############
##### creates -- or tries-- the data frame of letter frequencies
create_data_frame<-function(x){
cleaned <- gsub(pattern = "\\W|\\d", replace = "", tolower(x))
let.freqs <- substring(cleaned, 1:nchar(cleaned),1:nchar(cleaned))
tab <- sort(table(let.freqs), decreasing = TRUE)
dat <- as.data.frame(tab)
return(dat)
}
#####################################################################
dat1<-reactive({create_data_frame(input$ins)})
dat2<-reactive({create_data_frame(input$ins)})
#dat2<-reactive({create_data_frame(output$outs)})
output$c1c2 <- renderPlot({
grid.arrange(
ggplot(dat1(), aes(let.freqs, Freq, fill = Freq))+
geom_col() +
scale_fill_gradient(low = "#C35521", high = "#3F1F11")+
labs(x = "letter",
y = "frequency",
title = "Frequency of Letters in your Encrypted text",
subtitle = "By Absolute Count"),
ggplot(dat2(), aes(let.freqs, Freq, fill = Freq))+
geom_col() +
scale_fill_gradient(low = "#1E120F", high = "#A04445")+
labs(x = "letter",
y = "frequency",
title = "Frequency of Letters in your Encrypted text",
subtitle = "By Absolute Count")
)
})
}
shinyApp(ui = ui, server = server)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment