-
-
Save e-orlov/eb144455552452ae3afb9475e72fb5b2 to your computer and use it in GitHub Desktop.
Modification to assign PR a value between 1 and 10
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(igraph) | |
map <- function(x, range = c(0,1), from.range=NA) { | |
if(any(is.na(from.range))) from.range <- range(x, na.rm=TRUE) | |
## check if all values are the same | |
if(!diff(from.range)) return( | |
matrix(mean(range), ncol=ncol(x), nrow=nrow(x), | |
dimnames = dimnames(x))) | |
## map to [0,1] | |
x <- (x-from.range[1]) | |
x <- x/diff(from.range) | |
## handle single values | |
if(diff(from.range) == 0) x <- 0 | |
## map from [0,1] to [range] | |
if (range[1]>range[2]) x <- 1-x | |
x <- x*(abs(diff(range))) + min(range) | |
x[x<min(range) | x>max(range)] <- NA | |
x | |
} | |
# Swap out path to your Screaming Frog All Outlink CSV. For Windows, remember to change backslashes to forward slashes. | |
links <- read.csv("C:/Documents/screaming-frog-all-outlinks.csv", skip = 1) # CSV Path | |
# This line of code is optional. It filters out JavaScript, CSS, and Images. Technically you should keep them in there. | |
links <- subset(links, Type=="HREF") # Optional line. Filter. | |
links <- subset(links, Follow=="true") | |
links <- subset(links, select=c(Source,Destination)) | |
g <- graph.data.frame(links) | |
pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85) | |
values <- data.frame(pr$vector) | |
values$names <- rownames(values) | |
row.names(values) <- NULL | |
values <- values[c(2,1)] | |
names(values)[1] <- "url" | |
names(values)[2] <- "pr" | |
# Use map function to map pagerank values from 1 to 10 | |
values$pr <- map(values$pr, c(1,10)) | |
# Swap out 'domain' and 'com' to represent your website address. | |
values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter. | |
# Replace with your desired filename for the output file. | |
write.csv(values, file = "output-pagerank.csv") # Output file. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment