Created
October 26, 2018 20:49
-
-
Save voltek62/04ec0836f1922a9d6802aabd068f0298 to your computer and use it in GitHub Desktop.
Visualizing your website’s Internal Linking
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#autoinstall packages | |
packages <- c("igraph", "dplyr", "ggplot2", "magrittr") | |
if (length(setdiff(packages, rownames(installed.packages()))) > 0) { | |
install.packages(setdiff(packages, rownames(installed.packages()))) | |
} | |
# Enjoy learning ? https://dataseolabs.com | |
library(igraph) | |
library(dplyr) | |
library(ggplot2) | |
library(magrittr) | |
map <- function(x, range = c(0,1), from.range=NA) { | |
if(any(is.na(from.range))) from.range <- range(x, na.rm=TRUE) | |
## check if all values are the same | |
if(!diff(from.range)) return( | |
matrix(mean(range), ncol=ncol(x), nrow=nrow(x), | |
dimnames = dimnames(x))) | |
## map to [0,1] | |
x <- (x-from.range[1]) | |
x <- x/diff(from.range) | |
## handle single values | |
if(diff(from.range) == 0) x <- 0 | |
## map from [0,1] to [range] | |
if (range[1]>range[2]) x <- 1-x | |
x <- x*(abs(diff(range))) + min(range) | |
x[x<min(range) | x>max(range)] <- NA | |
x | |
} | |
# import ScreamingFrog : “Bulk Export” then “All Outlinks” | |
## skip first line | |
DF <- read.csv2("all_outlinks.csv", header=TRUE, sep = ",", stringsAsFactors = F, skip=1 ) | |
## we keep only link | |
DF <- DF[DF$Type=="AHREF",] | |
DF <- select(DF,Source,Destination) | |
## adapt colnames and rownames | |
colnames(DF) <- c("From","To") | |
rownames(DF) <- NULL | |
# generate graph with data.frame | |
graphObject = graph.data.frame(DF, directed = TRUE) | |
# to run pagerank we need a simple, undirected graph | |
graphObject = simplify(as.undirected(graphObject)) | |
# calculate pagerank | |
pr <- page.rank(graphObject, directed= TRUE, damping = 0.85) | |
# print graph with size node linked with pagerank | |
plot(graphObject, | |
layout=layout.fruchterman.reingold, | |
vertex.size = map(pr$vector, c(1,20)), | |
vertex.label = NA, | |
vertex.label.color = "black", | |
edge.arrow.size=.2 | |
) | |
# calculate pagerank and store into your data.frame | |
urls_pagerank <- pr %>% | |
use_series("vector") %>% | |
sort(decreasing = TRUE) %>% | |
as.data.frame %>% | |
set_colnames("raw.internal.pagerank") | |
urls_pagerank$Address<-rownames(urls_pagerank) | |
rownames(urls_pagerank) <- NULL | |
urls_pagerank <- mutate(urls_pagerank, internal.pagerank = map(raw.internal.pagerank, c(1,10))) | |
# print only your top URLs | |
nbUrl <- 800 | |
graphObjectTopUrl <- subgraph.edges(graphObject, 1:nbUrl) | |
# use tkplot for interactive graph | |
prLimit <- as.numeric(map(pr$vector, c(1,20)))[1:(nbUrl+1)] | |
tkplot(graphObjectTopUrl | |
,layout=layout.fruchterman.reingold | |
,vertex.size = prLimit | |
,vertex.label = NA | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment