Skip to content

Instantly share code, notes, and snippets.

@iMissile
Created February 2, 2018 07:06
Show Gist options
  • Save iMissile/be801189199532e68677d4f0b059141d to your computer and use it in GitHub Desktop.
Save iMissile/be801189199532e68677d4f0b059141d to your computer and use it in GitHub Desktop.
Shiny приложение для аналитики squid
library(tidyverse)
library(lubridate)
library(glue)
library(scales)
library(forcats)
library(readxl)
library(magrittr)
library(stringi)
library(futile.logger)
library(jsonlite)
library(Cairo)
library(RColorBrewer)
library(extrafont)
library(hrbrthemes)
library(shiny)
library(shinyjqui)
library(shinythemes)
library(shinyBS)
library(shinyjs)
library(shinyWidgets)
library(shinycssloaders)
library(anytime)
library(tictoc)
library(assertr)
library(checkmate)
options(shiny.reactlog=TRUE)
options(spinner.type=4)
eval(base::parse("funcs.R", encoding="UTF-8"))
# ================================================================
ui <-
navbarPage(
title="Squid статистика",
tabPanel("Статистика", value="general_panel"),
tabPanel("Настройки", value="config_panel"),
id="tsp",
theme=shinytheme("yeti"),
tags$head(tags$style(".rightAlign{float:right;}")),
conditionalPanel(
# general panel -----------------------
condition="input.tsp=='general_panel'",
fluidRow(
column(10, {}),
column(2, actionButton("process_btn", "Загрузить лог", class='rightAlign'))
),
tabsetPanel(
id="main_panel",
selected="graph_tab",
tabPanel("Таблица", value="table_tab",
p(),
mainPanel(
fluidRow(
column(6, div(withSpinner(DT::dataTableOutput("url_volume_table"))),
style="font-size: 90%"),
column(6, div(withSpinner(DT::dataTableOutput("url_host_volume_table"))),
style="font-size: 90%")
), width=10),
# ----------------
sidebarPanel(
selectInput("depth_filter", "Глубина данных",
choices=c('10 минут'=10,
'1 час'=60,
'24 часа'=24*60)
),
width=2)
),
tabPanel("График", value="graph_tab",
fluidRow(
p(),
jqui_sortabled(
div(id='top10_plots',
column(4, div(withSpinner(plotOutput('top10_left_plot', height="400px")))),
column(4, div(withSpinner(plotOutput('top10_center_plot', height="400px")))),
column(4, div(withSpinner(plotOutput('top10_right_plot', height="400px"))))
))
),
fluidRow(
column(12, div(withSpinner(plotOutput('timeline_plot', height="400px"))))
)
)
)
),
conditionalPanel(
# config panel -----------------------
condition = "input.tsp=='config_panel'",
fluidRow(
column(2, actionButton("set_test_dates_btn", "На демо дату", class='rightAlign'))
),
fluidRow(
column(12, verbatimTextOutput("info_text"))
)
),
shinyjs::useShinyjs() # Include shinyjs
)
# ================================================================
server <- function(input, output, session) {
# статические переменные ------------------------------------------------
log_name <- "app.log"
flog.appender(appender.tee(log_name))
flog.threshold(TRACE)
# реактивные переменные -------------------
raw_df <- reactive({
input$process_btn # обновлять будем вручную
# загрузим лог squid -------
# loadSquidLog("./data/acc.log")
tic()
data <- httr::content(httr::GET("http://10.0.0.246/access.log"))
flog.info(glue("Downloading log file from server: {capture.output(toc())}"))
tic()
df <- loadSquidLog(data)
flog.info(glue("Parsing log file from server: {capture.output(toc())}"))
df
})
squid_df <- reactive({
req(raw_df()) %>%
filter(timestamp>now()-days(2))
})
url_df <- reactive({
df <- req(squid_df()) %>%
filter(timestamp>now()-minutes(as.numeric(input$depth_filter))) %>%
select(host, bytes, url) %>%
group_by(url) %>%
summarise(volume=round(sum(bytes)/1024/1024, 1)) %>% # Перевели в Мб
arrange(desc(volume))
df
})
url_host_df <- reactive(({
# посчитаем сводку по отдельным HOST
req(url_df())
ids <- req(input$url_volume_table_rows_selected) # проводим анализ при выборе строки в таблице
flog.info(glue("Selected row num is {ids}. Data row: {capture.output(str(url_df()[ids, ]))}"))
url <- url_df()[[ids, "url"]]
url_val <- enquo(url) # превратили в строку
isolate({
df <- squid_df() %>%
filter(timestamp>now()-minutes(as.numeric(input$depth_filter))) %>%
filter(url==!!url_val) %>%
select(bytes, host) %>%
group_by(host) %>%
summarise(volume=round(sum(bytes)/1024/1024, 1)) %>% # Перевели в Мб
arrange(desc(volume))
})
df
}))
msg <- reactiveVal("")
# таблица с выборкой по каналам ----------------------------
output$url_volume_table <- DT::renderDataTable({
df <- req(url_df())
DT::datatable(df,
class='cell-border stripe',
rownames=FALSE,
filter='bottom',
selection=list(mode="single", target="row"),
options=list(dom='fltip',
pageLength=7,
lengthMenu=c(5, 7, 10, 15, 50),
order=list(list(1, 'desc')))) # нумерация с 0
})
# таблица-детализация по URL в разрезе ----------------------------
output$url_host_volume_table <- DT::renderDataTable({
df <- req(url_host_df())
DT::datatable(df,
class='cell-border stripe',
rownames=FALSE,
filter='bottom',
options=list(dom='fltip', #autoWidth=TRUE,
pageLength=7, lengthMenu=c(5, 7, 10, 15)))
})
# график Топ10 -------------
output$top10_left_plot <- renderPlot({
df <- req(squid_df()) %>%
filter(timestamp>now()-days(1))
plotTopHostDownload(df, subtitle="за последние сутки")
})
output$top10_center_plot <- renderPlot({
df <- req(squid_df()) %>%
filter(timestamp>now()-minutes(60))
plotTopHostDownload(df, subtitle="за последний час")
})
output$top10_right_plot <- renderPlot({
df <- req(squid_df()) %>%
filter(timestamp>now()-minutes(10))
plotTopHostDownload(df, subtitle="за последние 10 минут")
})
output$timeline_plot <- renderPlot({
df <- req(squid_df()) %>%
filter(timestamp>now()-days(1)) %>%
mutate(timegroup=hgroup.enum(timestamp, mins_bin=10)) %>%
select(timegroup, host, bytes) %>%
group_by(timegroup, host) %>%
summarise(volume=sum(bytes)/1024/1024*8/(10*60)) %>% # Перевели в Мбит/с
top_n(10, volume)
gp <- ggplot(df, aes(timegroup, volume)) +
geom_area(aes(fill=host), alpha=0.5, position="stack") +
scale_color_brewer(palette="Set1") +
scale_x_datetime(labels=date_format_tz("%d.%m\n%H:%M", tz="Europe/Moscow"),
breaks=date_breaks("4 hours"),
minor_breaks=date_breaks("1 hours")) +
theme_ipsum_rc(base_size=16, axis_title_size=14) +
xlab("Дата, время") +
ylab("Скорость, Mbit/s") +
ggtitle("Динамика трафика за последние 24 часа")
gp
})
# служебный вывод ---------------------
output$info_text <- renderText({
msg()
})
}
shinyApp(ui=ui, server=server)
#' Arrange time vector according equidistant time intervals
#'
#' There are two possible ways of arrangement: by hour's intervals OR by
#' minute's interval. Hour's arrangement has high priority. Hour arrangement
#' must be integer (1, 2, ...) or fractional part of hour (0.25 or 0.5 only)
#'
#' @param date POSIXct vector to arrange
#' @param hours_bin Duration (in hours) between arrangement points
#' @param mins_bin Duration (in minutes) between arrangement points
#' @export
hgroup.enum <- function(date, hours_bin=NULL, mins_bin=5){
# привязываем все измерения, которые попали в промежуток [0, t] к точке измерения.
# точки измерения могут быть кратны 1, 2, 3, 4, 6, 12 часам, определяется hour.bin
# отсчет измерений идет с 0:00
# поправка для лаборатории. для группировки меньше часа допускается указывать числа меньше 1
# 0.5 -- раз в полчаса.0.25 -- раз в 15 минут
# если hour.bin=NULL, то идет привязка к интервалам min.bin, заданном в минутах
# необходим пакет lubridate
tick_time <- date
if (is.null(hours_bin)){
# привязываем к минутным интервалам
n <- floor(lubridate::minute(tick_time)/mins_bin)
dt <- lubridate::floor_date(tick_time, unit="hour") + lubridate::minutes(n * mins_bin)
}else{
# привязываем к часовым интервалам
if (hours_bin < 1 & !(hours_bin %in% c(0.25, 0.5))) hours_bin=1
n <- floor((lubridate::hour(tick_time)*60 + lubridate::minute(tick_time))/ (hours_bin*60))
dt <- lubridate::floor_date(tick_time, unit="day") + lubridate::minutes(n * hours_bin*60)
}
dt
}
date_format_tz <- function(format = "%Y-%m-%d", tz = "UTC") {
function(x) format(x, format, tz=tz)
}
loadSquidLog <- function(fname){
checkmate::qassert(fname, "S=1")
raw_df <- read_table2(fname,
col_names=c("timestamp", "duration", "client_address", "result_codes",
"bytes", "request_method", "url", "user", "hierarcy_code", "type"),
col_types=("nicciccccc")
)
# browser()
df0 <- raw_df %>%
mutate_at(vars(timestamp), anytime, tz="Europe/Moscow") %>%
mutate(url=stri_replace_all_regex(url,
pattern=c("^([a-z]*)://", "^www\\.", "([^/]+).+", ":\\d+"),
replacement=c("", "", "$1", ""),
vectorize_all=FALSE)) %>%
mutate_at(vars(client_address), as.factor) %>%
mutate(ip=client_address, host=client_address)
df0
}
#' Title
#'
#' @param df raw traffic dataframe limited by time
#' @param subtitle gplot subtitle
#'
#' @return
#' @export
#'
#' @examples
plotTopHostDownload <- function(df, subtitle) {
flog.info(paste0("Top10 HOST download plot: nrow = ", nrow(df)))
if(nrow(df)==0) return(NULL)
# -------------- нарисуем Top10 по HOST за последние N минут ----
df0 <- df %>%
select(host, bytes, url) %>%
group_by(host) %>%
summarise(volume=round(sum(bytes)/1024/1024, 1)) %>% # Перевели в Мб
top_n(10, volume) %>%
# может возникнуть ситуация, когда все значения top_n одинаковы. тогда надо брать выборку
arrange(desc(volume)) %>%
filter(row_number()<=10) %>%
# уберем ненужный дребезг, все кто скачали менее 10Мб -- в топку
filter(!(volume<10 & row_number()>2)) %>%
mutate(label=format(volume, big.mark=" ")) %>%
mutate(hostname=glue("{host}\n ФИО")) %>%
mutate(hostname=fct_reorder(hostname, volume))
gp <- ggplot(df0, aes(hostname, volume)) +
geom_bar(fill=brewer.pal(n=9, name="Blues")[4],
alpha=0.5, stat="identity") +
geom_label(aes(label=label), fill="white", colour="black", fontface="bold", hjust=+1.1) +
theme_ipsum_rc(base_size=16, axis_title_size=14, subtitle_size=13) +
xlab("HOST") +
ylab("Суммарный Downlink, Мб") +
ggtitle("ТОП 10 скачивающих", subtitle=subtitle) +
coord_flip()
gp
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment