Created
February 2, 2018 07:06
-
-
Save iMissile/be801189199532e68677d4f0b059141d to your computer and use it in GitHub Desktop.
Shiny приложение для аналитики squid
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(lubridate) | |
library(glue) | |
library(scales) | |
library(forcats) | |
library(readxl) | |
library(magrittr) | |
library(stringi) | |
library(futile.logger) | |
library(jsonlite) | |
library(Cairo) | |
library(RColorBrewer) | |
library(extrafont) | |
library(hrbrthemes) | |
library(shiny) | |
library(shinyjqui) | |
library(shinythemes) | |
library(shinyBS) | |
library(shinyjs) | |
library(shinyWidgets) | |
library(shinycssloaders) | |
library(anytime) | |
library(tictoc) | |
library(assertr) | |
library(checkmate) | |
options(shiny.reactlog=TRUE) | |
options(spinner.type=4) | |
eval(base::parse("funcs.R", encoding="UTF-8")) | |
# ================================================================ | |
ui <- | |
navbarPage( | |
title="Squid статистика", | |
tabPanel("Статистика", value="general_panel"), | |
tabPanel("Настройки", value="config_panel"), | |
id="tsp", | |
theme=shinytheme("yeti"), | |
tags$head(tags$style(".rightAlign{float:right;}")), | |
conditionalPanel( | |
# general panel ----------------------- | |
condition="input.tsp=='general_panel'", | |
fluidRow( | |
column(10, {}), | |
column(2, actionButton("process_btn", "Загрузить лог", class='rightAlign')) | |
), | |
tabsetPanel( | |
id="main_panel", | |
selected="graph_tab", | |
tabPanel("Таблица", value="table_tab", | |
p(), | |
mainPanel( | |
fluidRow( | |
column(6, div(withSpinner(DT::dataTableOutput("url_volume_table"))), | |
style="font-size: 90%"), | |
column(6, div(withSpinner(DT::dataTableOutput("url_host_volume_table"))), | |
style="font-size: 90%") | |
), width=10), | |
# ---------------- | |
sidebarPanel( | |
selectInput("depth_filter", "Глубина данных", | |
choices=c('10 минут'=10, | |
'1 час'=60, | |
'24 часа'=24*60) | |
), | |
width=2) | |
), | |
tabPanel("График", value="graph_tab", | |
fluidRow( | |
p(), | |
jqui_sortabled( | |
div(id='top10_plots', | |
column(4, div(withSpinner(plotOutput('top10_left_plot', height="400px")))), | |
column(4, div(withSpinner(plotOutput('top10_center_plot', height="400px")))), | |
column(4, div(withSpinner(plotOutput('top10_right_plot', height="400px")))) | |
)) | |
), | |
fluidRow( | |
column(12, div(withSpinner(plotOutput('timeline_plot', height="400px")))) | |
) | |
) | |
) | |
), | |
conditionalPanel( | |
# config panel ----------------------- | |
condition = "input.tsp=='config_panel'", | |
fluidRow( | |
column(2, actionButton("set_test_dates_btn", "На демо дату", class='rightAlign')) | |
), | |
fluidRow( | |
column(12, verbatimTextOutput("info_text")) | |
) | |
), | |
shinyjs::useShinyjs() # Include shinyjs | |
) | |
# ================================================================ | |
server <- function(input, output, session) { | |
# статические переменные ------------------------------------------------ | |
log_name <- "app.log" | |
flog.appender(appender.tee(log_name)) | |
flog.threshold(TRACE) | |
# реактивные переменные ------------------- | |
raw_df <- reactive({ | |
input$process_btn # обновлять будем вручную | |
# загрузим лог squid ------- | |
# loadSquidLog("./data/acc.log") | |
tic() | |
data <- httr::content(httr::GET("http://10.0.0.246/access.log")) | |
flog.info(glue("Downloading log file from server: {capture.output(toc())}")) | |
tic() | |
df <- loadSquidLog(data) | |
flog.info(glue("Parsing log file from server: {capture.output(toc())}")) | |
df | |
}) | |
squid_df <- reactive({ | |
req(raw_df()) %>% | |
filter(timestamp>now()-days(2)) | |
}) | |
url_df <- reactive({ | |
df <- req(squid_df()) %>% | |
filter(timestamp>now()-minutes(as.numeric(input$depth_filter))) %>% | |
select(host, bytes, url) %>% | |
group_by(url) %>% | |
summarise(volume=round(sum(bytes)/1024/1024, 1)) %>% # Перевели в Мб | |
arrange(desc(volume)) | |
df | |
}) | |
url_host_df <- reactive(({ | |
# посчитаем сводку по отдельным HOST | |
req(url_df()) | |
ids <- req(input$url_volume_table_rows_selected) # проводим анализ при выборе строки в таблице | |
flog.info(glue("Selected row num is {ids}. Data row: {capture.output(str(url_df()[ids, ]))}")) | |
url <- url_df()[[ids, "url"]] | |
url_val <- enquo(url) # превратили в строку | |
isolate({ | |
df <- squid_df() %>% | |
filter(timestamp>now()-minutes(as.numeric(input$depth_filter))) %>% | |
filter(url==!!url_val) %>% | |
select(bytes, host) %>% | |
group_by(host) %>% | |
summarise(volume=round(sum(bytes)/1024/1024, 1)) %>% # Перевели в Мб | |
arrange(desc(volume)) | |
}) | |
df | |
})) | |
msg <- reactiveVal("") | |
# таблица с выборкой по каналам ---------------------------- | |
output$url_volume_table <- DT::renderDataTable({ | |
df <- req(url_df()) | |
DT::datatable(df, | |
class='cell-border stripe', | |
rownames=FALSE, | |
filter='bottom', | |
selection=list(mode="single", target="row"), | |
options=list(dom='fltip', | |
pageLength=7, | |
lengthMenu=c(5, 7, 10, 15, 50), | |
order=list(list(1, 'desc')))) # нумерация с 0 | |
}) | |
# таблица-детализация по URL в разрезе ---------------------------- | |
output$url_host_volume_table <- DT::renderDataTable({ | |
df <- req(url_host_df()) | |
DT::datatable(df, | |
class='cell-border stripe', | |
rownames=FALSE, | |
filter='bottom', | |
options=list(dom='fltip', #autoWidth=TRUE, | |
pageLength=7, lengthMenu=c(5, 7, 10, 15))) | |
}) | |
# график Топ10 ------------- | |
output$top10_left_plot <- renderPlot({ | |
df <- req(squid_df()) %>% | |
filter(timestamp>now()-days(1)) | |
plotTopHostDownload(df, subtitle="за последние сутки") | |
}) | |
output$top10_center_plot <- renderPlot({ | |
df <- req(squid_df()) %>% | |
filter(timestamp>now()-minutes(60)) | |
plotTopHostDownload(df, subtitle="за последний час") | |
}) | |
output$top10_right_plot <- renderPlot({ | |
df <- req(squid_df()) %>% | |
filter(timestamp>now()-minutes(10)) | |
plotTopHostDownload(df, subtitle="за последние 10 минут") | |
}) | |
output$timeline_plot <- renderPlot({ | |
df <- req(squid_df()) %>% | |
filter(timestamp>now()-days(1)) %>% | |
mutate(timegroup=hgroup.enum(timestamp, mins_bin=10)) %>% | |
select(timegroup, host, bytes) %>% | |
group_by(timegroup, host) %>% | |
summarise(volume=sum(bytes)/1024/1024*8/(10*60)) %>% # Перевели в Мбит/с | |
top_n(10, volume) | |
gp <- ggplot(df, aes(timegroup, volume)) + | |
geom_area(aes(fill=host), alpha=0.5, position="stack") + | |
scale_color_brewer(palette="Set1") + | |
scale_x_datetime(labels=date_format_tz("%d.%m\n%H:%M", tz="Europe/Moscow"), | |
breaks=date_breaks("4 hours"), | |
minor_breaks=date_breaks("1 hours")) + | |
theme_ipsum_rc(base_size=16, axis_title_size=14) + | |
xlab("Дата, время") + | |
ylab("Скорость, Mbit/s") + | |
ggtitle("Динамика трафика за последние 24 часа") | |
gp | |
}) | |
# служебный вывод --------------------- | |
output$info_text <- renderText({ | |
msg() | |
}) | |
} | |
shinyApp(ui=ui, server=server) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Arrange time vector according equidistant time intervals | |
#' | |
#' There are two possible ways of arrangement: by hour's intervals OR by | |
#' minute's interval. Hour's arrangement has high priority. Hour arrangement | |
#' must be integer (1, 2, ...) or fractional part of hour (0.25 or 0.5 only) | |
#' | |
#' @param date POSIXct vector to arrange | |
#' @param hours_bin Duration (in hours) between arrangement points | |
#' @param mins_bin Duration (in minutes) between arrangement points | |
#' @export | |
hgroup.enum <- function(date, hours_bin=NULL, mins_bin=5){ | |
# привязываем все измерения, которые попали в промежуток [0, t] к точке измерения. | |
# точки измерения могут быть кратны 1, 2, 3, 4, 6, 12 часам, определяется hour.bin | |
# отсчет измерений идет с 0:00 | |
# поправка для лаборатории. для группировки меньше часа допускается указывать числа меньше 1 | |
# 0.5 -- раз в полчаса.0.25 -- раз в 15 минут | |
# если hour.bin=NULL, то идет привязка к интервалам min.bin, заданном в минутах | |
# необходим пакет lubridate | |
tick_time <- date | |
if (is.null(hours_bin)){ | |
# привязываем к минутным интервалам | |
n <- floor(lubridate::minute(tick_time)/mins_bin) | |
dt <- lubridate::floor_date(tick_time, unit="hour") + lubridate::minutes(n * mins_bin) | |
}else{ | |
# привязываем к часовым интервалам | |
if (hours_bin < 1 & !(hours_bin %in% c(0.25, 0.5))) hours_bin=1 | |
n <- floor((lubridate::hour(tick_time)*60 + lubridate::minute(tick_time))/ (hours_bin*60)) | |
dt <- lubridate::floor_date(tick_time, unit="day") + lubridate::minutes(n * hours_bin*60) | |
} | |
dt | |
} | |
date_format_tz <- function(format = "%Y-%m-%d", tz = "UTC") { | |
function(x) format(x, format, tz=tz) | |
} | |
loadSquidLog <- function(fname){ | |
checkmate::qassert(fname, "S=1") | |
raw_df <- read_table2(fname, | |
col_names=c("timestamp", "duration", "client_address", "result_codes", | |
"bytes", "request_method", "url", "user", "hierarcy_code", "type"), | |
col_types=("nicciccccc") | |
) | |
# browser() | |
df0 <- raw_df %>% | |
mutate_at(vars(timestamp), anytime, tz="Europe/Moscow") %>% | |
mutate(url=stri_replace_all_regex(url, | |
pattern=c("^([a-z]*)://", "^www\\.", "([^/]+).+", ":\\d+"), | |
replacement=c("", "", "$1", ""), | |
vectorize_all=FALSE)) %>% | |
mutate_at(vars(client_address), as.factor) %>% | |
mutate(ip=client_address, host=client_address) | |
df0 | |
} | |
#' Title | |
#' | |
#' @param df raw traffic dataframe limited by time | |
#' @param subtitle gplot subtitle | |
#' | |
#' @return | |
#' @export | |
#' | |
#' @examples | |
plotTopHostDownload <- function(df, subtitle) { | |
flog.info(paste0("Top10 HOST download plot: nrow = ", nrow(df))) | |
if(nrow(df)==0) return(NULL) | |
# -------------- нарисуем Top10 по HOST за последние N минут ---- | |
df0 <- df %>% | |
select(host, bytes, url) %>% | |
group_by(host) %>% | |
summarise(volume=round(sum(bytes)/1024/1024, 1)) %>% # Перевели в Мб | |
top_n(10, volume) %>% | |
# может возникнуть ситуация, когда все значения top_n одинаковы. тогда надо брать выборку | |
arrange(desc(volume)) %>% | |
filter(row_number()<=10) %>% | |
# уберем ненужный дребезг, все кто скачали менее 10Мб -- в топку | |
filter(!(volume<10 & row_number()>2)) %>% | |
mutate(label=format(volume, big.mark=" ")) %>% | |
mutate(hostname=glue("{host}\n ФИО")) %>% | |
mutate(hostname=fct_reorder(hostname, volume)) | |
gp <- ggplot(df0, aes(hostname, volume)) + | |
geom_bar(fill=brewer.pal(n=9, name="Blues")[4], | |
alpha=0.5, stat="identity") + | |
geom_label(aes(label=label), fill="white", colour="black", fontface="bold", hjust=+1.1) + | |
theme_ipsum_rc(base_size=16, axis_title_size=14, subtitle_size=13) + | |
xlab("HOST") + | |
ylab("Суммарный Downlink, Мб") + | |
ggtitle("ТОП 10 скачивающих", subtitle=subtitle) + | |
coord_flip() | |
gp | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment