Created
September 3, 2017 02:12
-
-
Save rundel/5585a28862480da1bf43bd7997260b71 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(shiny) | |
library(tidyverse) | |
library(gridExtra) | |
ui <- fluidPage( | |
titlePanel("Frequency Decryption: A Broken Algorithm"), | |
tags$br(), | |
tags$br(), | |
sidebarLayout( | |
sidebarPanel( | |
textAreaInput(inputId = "ins", | |
label="insert cipher text here", | |
width = "400px", | |
height = "500px", | |
placeholder = "ciphertext") | |
), | |
mainPanel( | |
tags$div(tags$p((textOutput(outputId = "outs"))), | |
tags$div(class="header", checked=NA, | |
tags$h3("How does this algorithm work?"), | |
tags$p("And why doesn't it work?"), | |
tags$br(), | |
tags$p("The decryption algorithm is based on the frequency | |
with which letters appear in English speech and prose. | |
Letters like E, T, A, and O are much more common than | |
J, X, Q, and Z, and their frequencies follow a distribution. | |
This is actually true for every language, and is one of the | |
ways in which linguists determine whether an unknown language | |
is real or a hoax. What this algorithm does is take your | |
ciphertext and re-sort it based on letter frequency, then | |
transposes those letters based on the frequency in the | |
English language."), | |
tags$p("So if the most common letter in your ciphertext is Q, then | |
it will be replaced with the most common letter in English, E."), | |
tags$p("So why doesn't it work? Because the frequencies given are | |
based on standard English speech overall-- even encrypted text | |
with tens of thousands of words still doesn't always approximate | |
the actual frequencies of letters in English speech."), | |
tags$p("If you use this tool with a big enough sample, you'll find | |
whispers of real words start to become apparent, but even | |
common words like 'the' will still end up looking silly, like | |
'tde'.") | |
), | |
plotOutput("c3"), | |
plotOutput("c1c2") | |
) | |
) | |
) | |
) | |
server <- function(input, output){ | |
library(shiny) | |
library(tidyverse) | |
library(gridExtra) | |
freq.sort<-function(x){ | |
english.by.frequency <- "etaoinshrdlcumwfgypbvkjxqz" | |
cleaned <- gsub(pattern = "\\W|\\d", replace = "", tolower(x)) | |
letter.frequencies <- substring(cleaned, 1:nchar(cleaned),1:nchar(cleaned)) | |
tab <- sort(table(letter.frequencies), decreasing = TRUE) | |
ordered_characters <- dimnames(tab)$letter.frequencies | |
ordered_in_a_string <- paste(ordered_characters, collapse = "") | |
excess <- (26 - nchar(ordered_in_a_string)) | |
addendum1 <- rep.int("_", excess) | |
addendum2 <- paste(addendum1, collapse="") | |
final <- paste(ordered_in_a_string, addendum2, sep="") | |
cipher <- chartr(final,english.by.frequency,x) | |
finaltext <- cat(cipher) | |
return(finaltext) | |
} | |
output$outs <- renderPrint({ | |
freq.sort(input$ins) | |
}) | |
####################### | |
# code for the Eng graph | |
lets<-c("e", "t", "a", "o", "i", "n", "s", "h", "r", "d", "l", "c", "u", "m", "w", "f", "g", "y", "p", "b", "v", "k", "j", "x", "q", "z") | |
percs<-c(12.702, 9.056, 8.167, 7.507, 6.966, 6.749, 6.327, 6.094, 5.987, 4.253, 4.025, 2.782, 2.758, 2.406, 2.36, 2.228, 2.015, 1.974, 1.929, 1.492, 0.978, 0.772, 0.153, 0.15, 0.095, 0.074) | |
eng<-data.frame(lets,percs) | |
output$c3 <- renderPlot({ | |
ggplot(eng, aes(x = reorder(lets, -percs), y = percs, fill = percs)) + | |
geom_col() + | |
scale_fill_gradient(low = "#DCCD59", high = "#2F3316")+ | |
labs(x = "letter", | |
y = "frequency", | |
title = "Frequency of Letters in Standard English", | |
subtitle = "By Percent") | |
}) | |
############# end ############# | |
##### creates -- or tries-- the data frame of letter frequencies | |
create_data_frame<-function(x){ | |
cleaned <- gsub(pattern = "\\W|\\d", replace = "", tolower(x)) | |
let.freqs <- substring(cleaned, 1:nchar(cleaned),1:nchar(cleaned)) | |
tab <- sort(table(let.freqs), decreasing = TRUE) | |
dat <- as.data.frame(tab) | |
return(dat) | |
} | |
##################################################################### | |
dat1<-reactive({create_data_frame(input$ins)}) | |
dat2<-reactive({create_data_frame(input$ins)}) | |
#dat2<-reactive({create_data_frame(output$outs)}) | |
output$c1c2 <- renderPlot({ | |
grid.arrange( | |
ggplot(dat1(), aes(let.freqs, Freq, fill = Freq))+ | |
geom_col() + | |
scale_fill_gradient(low = "#C35521", high = "#3F1F11")+ | |
labs(x = "letter", | |
y = "frequency", | |
title = "Frequency of Letters in your Encrypted text", | |
subtitle = "By Absolute Count"), | |
ggplot(dat2(), aes(let.freqs, Freq, fill = Freq))+ | |
geom_col() + | |
scale_fill_gradient(low = "#1E120F", high = "#A04445")+ | |
labs(x = "letter", | |
y = "frequency", | |
title = "Frequency of Letters in your Encrypted text", | |
subtitle = "By Absolute Count") | |
) | |
}) | |
} | |
shinyApp(ui = ui, server = server) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment