Skip to content

Instantly share code, notes, and snippets.

@aureliennicosia
Last active August 13, 2016 16:05
Show Gist options
  • Save aureliennicosia/d94a3e5925b53eb3ad4761ad844dfc54 to your computer and use it in GitHub Desktop.
Save aureliennicosia/d94a3e5925b53eb3ad4761ad844dfc54 to your computer and use it in GitHub Desktop.
Corrected Ngram Viewer
list.of.packages <- c("ggplot2", "devtools", "lubridate", "nlme",
"plotly", "shinydashboard", "shiny", "git2r")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,
"Package"])]
if (length(new.packages)) install.packages(new.packages)
library(shiny)
library(shinydashboard)
library(devtools)
install_github("ngramr", "seancarmody")
require(ngramr)
library(ggplot2)
library(lubridate)
library(nlme)
library(plotly)
library(shinydashboard)
library(shiny)
library(git2r)
# Setup Shiny app back-end components
# -------------------------------------
server <- function(input, output) {
x.ng <- reactive({
ng <- ngram(input$searchButton2, year_start = input$daterange[1],
year_end = input$daterange[2], count = TRUE, corpus = input$checkbox,
smoothing = 0)
})
x.etall <- reactive({
ngram("et al", year_start = input$daterange[1], year_end = input$daterange[2],
count = TRUE, corpus = input$checkbox, smoothing = 0)
})
x.ofthe <- reactive({
ngram("of the", year_start = input$daterange[1], year_end = input$daterange[2],
count = TRUE, corpus = input$checkbox, smoothing = 0)
})
x.the <- reactive({
ngram("the", year_start = input$daterange[1], year_end = input$daterange[2],
count = TRUE, corpus = input$checkbox, smoothing = 0)
})
x <- reactive({
df = data.frame(p.t = x.ng()$Count/x.the()$Count, q.t = x.etall()$Count/x.ofthe()$Count,
time = x.ng()$Year)
})
x.final <- reactive({
fit = gls(p.t ~ time * q.t, data = x(), corr = corAR1(form = ~1))
# ## simple examples using gamm as alternative to gam
beta <- coef(fit)
beta.trans <- c(beta[3] + beta[1], beta[4] + beta[2],
beta[1], beta[2])
p.t_q.t <- pmax((beta.trans[1] + beta.trans[2] * x()$time) *
x()$q.t, 0)
p.t_Nq.t <- pmax((beta.trans[3] + beta.trans[4] * x()$time) *
(1 - x()$q.t), 0)
prop = c(p.t_q.t, p.t_Nq.t, x()$p.t)
corpus = c(rep("Scientific literature", length(p.t_q.t)),
rep("Non scientific literature", length(p.t_Nq.t)),
rep("Google Viewer", length(x()$p.t)))
df2 <- data.frame(proportion = prop * 100, Trends = corpus,
Year = x()$time)
df2
})
plot.choice = reactive(input$plotChoice)
output$plot.corrected <- renderPlotly({
if (plot.choice() == "all") {
df = x.final()
title.choix <- "Google Viewer and corrected trend"
}
if (plot.choice() == "google") {
df = subset(x.final(), Trends == "Google Viewer")
title.choix <- "Google Viewer trend"
}
if (plot.choice() == "corrected") {
df = subset(x.final(), Trends != "Google Viewer")
title.choix <- "Corrected trend"
}
p2 <- ggplot(df, aes(x = Year, y = proportion, colour = Trends)) +
geom_line() + ggtitle(paste0(title.choix, " of ",
input$searchButton2, sep = " ")) + ylab("Proportion (%)")
ggplotly(p2)
})
}
# This is the user-interface definition of a Shiny web application.
# You can find out more about building applications with Shiny here:
#
# http://shiny.rstudio.com
#
sidebar <- dashboardSidebar(
textInput("searchButton2", h3("Word"), value= "Figure"),
sliderInput("daterange", h3("Year range"),
min = 1800, max = 2008, value = c(1900,2000)),
# dateRangeInput("daterange", h3("Year range:"),format = "yyyy", start="1900-01-01", end="2000-01-01", startview = "decade"),
selectInput("checkbox",
label = h3("Choose the corpus"),
choices = list("American English 2012" = "eng_us_2012",
"American English 2009" = "eng_us_2009",
"British English 2012" = "eng_gb_2012",
"British English 2009" = "eng_gb_2009",
"English 2012" = "eng_2012",
"English 2009" = "eng_2009",
"English Fiction 2012" = "eng_fiction_2012",
"English Fiction 2009" = "eng_fiction_2009",
"Google One Million" = "eng_1m_2009",
"French 2012" = "fre_2012",
"French 2009" = "fre_2009"
),
selected = "eng_us_2009")
)
# Simple header -----------------------------------------------------------
dashboardPage(
dashboardHeader(title = "Corrected Ngrams Viewer by Aurélien Nicosia", titleWidth = 450),
dashboardSidebar(sidebar ,titleWidth = 350),
dashboardBody(
# Boxes need to be put in a row (or column)
fluidRow(
box(
title = "What do you want to plot?",width= 8,collapsible= TRUE,status = "warning", solidHeader = TRUE, br(),
selectInput("plotChoice",
label = NULL,
choices = list("Google Viewer and corrected trend" = "all",
"Google Viewer" = "google", "Corrected trend" = "corrected"),
selected = "all"
)),
box(title= "Extracting useful information from the Google Ngram
dataset: A general method to take the growth of the scientific literature into account" ,statut = "primary",collapsible= TRUE,solidHeader = TRUE, br(),
plotlyOutput("plot.corrected"), width =8)
)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment