Created
July 11, 2014 06:45
-
-
Save ogibayashi/5cd9937fe134def925b2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
## Hadoopのジョブ実行時間を可視化する | |
## | |
## 入力ファイルは、以下のように<attempt ID>,<start>,<end>のフォーマットである必要がある | |
## | |
## attempt_201405161748_0029_m_000074_0,1400232465,1400232468 | |
## attempt_201405161748_0029_m_000072_0,1400232468,1400232474 | |
## attempt_201405161748_0029_m_000047_0,1400232468,1400232477 | |
## attempt_201405161748_0029_m_000048_0,1400232468,1400232482 | |
## attempt_201405161748_0029_m_000061_0,1400232468,1400232483 | |
library(ggplot2) | |
library(reshape2) | |
# 定数 | |
# | |
# 一画面に出力するtask attemptの数 | |
numAttemptsPerPage <- 100 | |
# グラフ作成 | |
generateTaskChart <- function(df,x_limits) { | |
attempts <- rev(unique(df[order(df$start),]$attempt)) | |
tasks <- melt(df,id.vars="attempt") | |
tasks$value <- as.POSIXct(tasks$value,origin=as.Date("1970-01-01")) | |
tasks$type <- ifelse(grepl("_m_",tasks$attempt),"map","reduce") | |
g <- ggplot(tasks) + geom_line(aes(value,attempt)) + | |
# This cause errors like "Error in eval(expr, envir, enclos) : object 'tasks' not found" | |
# g <- ggplot(tasks) + geom_line(aes(value,attempt,colour=tasks$type)) + | |
theme(text = element_text(size=6)) + scale_y_discrete(limits=attempts) + | |
scale_x_datetime(limits=x_limits) + | |
theme(axis.text.x=element_text(angle=90),legend.position="right") | |
return(g) | |
} | |
arg <- commandArgs(trailingOnly=TRUE) | |
filename <- arg[1] | |
# 出力ファイル名 | |
outputFileName <- ifelse(is.na(arg[2]), "Hadoop_task_chart.pdf", arg[2]) | |
# 入力ファイルフォーマットは、<task attempt ID>,<start time>,<end time> | |
df <- read.csv(filename,stringsAsFactors=FALSE,header=FALSE) | |
names(df) <- c("attempt","start","end") | |
#df$start <- as.POSIXct(df$start,origin=as.Date("1970-01-01")) | |
#df$end <- as.POSIXct(df$end,origin=as.Date("1970-01-01")) | |
df <- df[order(df$start),] | |
## print(min(df$start)) | |
## print(max(df$end)) | |
#limits <- c(as.POSIXlt(min(df$start)),as.POSIXlt(max(df$end))) | |
#limits <- c(min(as.POSIXlt(df$start)),max(as.POSIXlt(df$end))) | |
limits <- c(min(as.POSIXct(df$start,origin=as.Date("1970-01-01"))),max(as.POSIXct(df$end,origin=as.Date("1970-01-01")))) | |
print(limits) | |
# numJobsPerPage個のタスク毎に、一つのグラフを生成 | |
pdf(outputFileName) | |
for(i in 0:(round(nrow(df) / numAttemptsPerPage))){ | |
row_start <- i*numAttemptsPerPage+1 | |
row_end <- (i+1)*numAttemptsPerPage | |
row_end <- ifelse(row_end > nrow(df), nrow(df), row_end) | |
df_sub <- df[row_start:row_end,] | |
print(generateTaskChart(df_sub,limits)) | |
} | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment