Last active
July 25, 2019 23:27
-
-
Save dmi3kno/c09d6d57acd19b2ec09c9d97b1a40256 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(magick) | |
library(tidyverse) | |
library(tesseract) | |
library(hocr) | |
################ helpers #################### | |
image_plot<-function(img, x, y, col="green", pointsize=5){ | |
res <- image_draw(img, pointsize = pointsize) | |
points(x, y, col=col) | |
dev.off() | |
res | |
} | |
image_getpixelcol <- function(img, x, y){ | |
image_raster(image_crop(img, geometry_area(1,1,x,y)))$col | |
} | |
ll_intersection <- function(p1, p2, p3, p4){ | |
t_num <- (p1[1]-p3[1])*(p3[2]-p4[2])-(p1[2]-p3[2])*(p3[1]-p4[1]) | |
t_denom <- (p1[1]-p2[1])*(p3[2]-p4[2])-(p1[2]-p2[2])*(p3[1]-p4[1]) | |
tibble(xsec_x = p1[1]+t_num/t_denom*(p2[1]-p1[1]), | |
xsec_y = p1[2]+t_num/t_denom*(p2[2]-p1[2])) | |
} | |
pad_word_bbox <- function(bbox, word, n=1, side="both"){ | |
bb_lst <- lapply(strsplit(bbox, "\\s|,"), function(x) as.numeric(trimws(x))) | |
p <- mapply(function(x, y) {(x[3]-x[1])/nchar(y)}, bb_lst, word, SIMPLIFY = FALSE) | |
if(side=="left"|side=="both") | |
bb_lst[] <- mapply(function(x,y){x[1] <- x[1]-y*n; x}, bb_lst, p, SIMPLIFY = FALSE) | |
if(side=="right"|side=="both") | |
bb_lst[] <- mapply(function(x,y){x[3] <- x[3]+y*n; x}, bb_lst, p, SIMPLIFY = FALSE) | |
sapply(bb_lst, paste, collapse = " ") | |
} | |
############################################## | |
img <- image_read("~/Downloads/dl/2019.png") #%>% | |
#image_resize("700") | |
crop_rectangle <- function(img, hgeom){ | |
ii <- image_info(img) | |
line_df <- img %>% | |
image_canny() %>% | |
#image_hough_draw("50x50+500", overlay = T) %>% | |
image_hough_txt(hgeom) %>% | |
read_lines() %>% | |
enframe() %>% | |
filter(str_detect(value,"^line")) %>% | |
separate(value, into=c("geom", "coord1", "coord2", "len"), sep="\\s", extra = "merge") %>% | |
mutate(line_id=paste(geom, name, sep="_")) %>% | |
select(-name, -geom, -len) %>% | |
mutate_at(vars(starts_with("coord")), str_split,",") %>% | |
mutate_at(vars(starts_with("coord")), map, as.numeric) %>% | |
mutate_at(vars(starts_with("coord")), map, ceiling) | |
geom_coord_df <- as.data.frame(t(combn(line_df$line_id, 2)),stringsAsFactors = FALSE) %>% | |
as_tibble() %>% | |
left_join(line_df, by=c("V1"="line_id")) %>% | |
left_join(line_df, by=c("V2"="line_id")) %>% | |
mutate(xsect = pmap(list(coord1.x, coord2.x, coord1.y, coord2.y), | |
~ll_intersection(..1, ..2, ..3, ..4))) %>% | |
unnest(xsect) %>% | |
filter(between(xsec_x, 0, ii$width), | |
between(xsec_y, 0, ii$height)) %>% | |
arrange(xsec_x, xsec_y) %>% | |
slice(1,n()) | |
geom <- geometry_area(width = diff(geom_coord_df$xsec_x), | |
height = diff(geom_coord_df$xsec_y), | |
x_off = geom_coord_df$xsec_x[1], | |
y_off = geom_coord_df$xsec_y[1]) | |
image_crop(img, geom) | |
} | |
img_cropped <- crop_rectangle(img, "50x50+800") | |
centroids_df <- img_cropped %>% | |
image_convert(type="Grayscale", antialias = TRUE) %>% | |
image_threshold("white", "60%") %>% | |
image_threshold("black", "75%") %>% | |
image_negate() %>% | |
image_morphology("Open", "Disk:5") %>% | |
image_morphology("Thinning", "Skeleton", -1) %>% | |
image_morphology("Thinning", "LineEnds", -1) %>% | |
image_morphology("Thinning", "Corners", -1) %>% | |
image_morphology("Thinning", "Edges", -1) %>% | |
image_morphology("Thinning", "LineEnds", -1) %>% | |
image_raster() %>% | |
dplyr::filter(col!="#000000ff") %>% | |
mutate(current_col=map2_chr(x,y, | |
~image_getpixelcol(img_cropped, .x, .y))) | |
ref_col <- centroids_df %>% | |
count(current_col) %>% | |
filter(n==max(n)) %>% | |
pull(current_col) | |
img_debobbled <- img_cropped | |
for (i in seq_along(rownames(centroids_df))){ | |
img_debobbled <- image_fill(img_debobbled, "red", | |
geometry_point(centroids_df$x[i], | |
centroids_df$y[i]), | |
fuzz=50, refcolor = ref_col) | |
} | |
centroids_bbs <- centroids_df %>% | |
mutate(centroid_id=1:n(), | |
centroid_bbox=map2_chr(x,y, | |
~paste(c(.x-1,.y-1, .x+1, .y+1), | |
collapse = " "))) | |
txt_df <- img_debobbled %>% | |
image_transparent("red") %>% | |
image_background("white") %>% | |
image_convert(type="Grayscale", antialias = TRUE, matte=FALSE) %>% | |
image_threshold("white", "60%") %>% | |
image_threshold("black", "75%") %>% | |
image_ocr(HOCR=TRUE) %>% | |
hocr::hocr_parse() %>% | |
tidy_tesseract() | |
plot_df <- txt_df %>% | |
mutate(ocrx_word_bbox_padded = pad_word_bbox(ocrx_word_bbox, ocrx_word_value, 2)) %>% | |
group_by(ocr_line_id) %>% | |
mutate(word_bb_xsect=hocr::bbox_intersect(ocrx_word_bbox_padded), | |
ocrx_line_id_corr=ifelse(is.na(word_bb_xsect), | |
paste(ocr_line_id, ocrx_word_id, sep="_"), | |
as.character(ocr_line_id))) %>% | |
group_by(ocrx_line_id_corr) %>% | |
summarise(ocrx_word_value=paste(ocrx_word_value, collapse = " "), | |
ocrx_phrase_bbox=hocr::bbox_union(ocrx_word_bbox)) %>% | |
mutate(ocrx_phrase_bbox_padded=pad_word_bbox(ocrx_phrase_bbox, | |
ocrx_word_value, 5)) %>% | |
mutate(centroid_id=map_int(ocrx_phrase_bbox_padded, | |
~ which(!is.na(map_chr(centroids_bbs$centroid_bbox, | |
function(x)hocr::bbox_intersect(c(x, .x)))))) | |
) %>% | |
left_join(centroids_bbs) %>% | |
mutate(y_inv=image_info(img_debobbled)$height-y) | |
library(ggrepel) | |
library(hrbrthemes) | |
plot_df %>% | |
ggplot()+ | |
geom_point(aes(x=x, y=y_inv), size=3, color=ref_col)+ | |
geom_text_repel(aes(x=x, y=y_inv, label=ocrx_word_value),color=ref_col, size=3) + | |
theme_ipsum_rc(grid_col = "grey95")+ | |
labs(title="Gartner Magic Quadrant", | |
subtitle="Data Science and Machine Learning Platforms", | |
caption="Source:Gartner 2019", | |
x="Completeness of Vision", y="Ability to Execute")+ | |
coord_cartesian(xlim=c(0, image_info(img_debobbled)$width), | |
ylim=c(0, image_info(img_debobbled)$height), expand = T)+ | |
theme(axis.line = element_blank(), | |
axis.text = element_blank(), | |
panel.background = element_blank(), | |
panel.border = element_rect(colour = "grey30", fill=NA, size=3)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment