Last active
August 3, 2022 08:53
-
-
Save pshapiro/d29918321b0c763fe1d7997a86e77e4f to your computer and use it in GitHub Desktop.
Modification to assign PR a value between 1 and 10
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(igraph) | |
map <- function(x, range = c(0,1), from.range=NA) { | |
if(any(is.na(from.range))) from.range <- range(x, na.rm=TRUE) | |
## check if all values are the same | |
if(!diff(from.range)) return( | |
matrix(mean(range), ncol=ncol(x), nrow=nrow(x), | |
dimnames = dimnames(x))) | |
## map to [0,1] | |
x <- (x-from.range[1]) | |
x <- x/diff(from.range) | |
## handle single values | |
if(diff(from.range) == 0) x <- 0 | |
## map from [0,1] to [range] | |
if (range[1]>range[2]) x <- 1-x | |
x <- x*(abs(diff(range))) + min(range) | |
x[x<min(range) | x>max(range)] <- NA | |
x | |
} | |
# Swap out path to your Screaming Frog All Outlink CSV. For Windows, remember to change backslashes to forward slashes. | |
links <- read.csv("C:/Documents/screaming-frog-all-outlinks.csv", skip = 1) # CSV Path | |
# This line of code is optional. It filters out JavaScript, CSS, and Images. Technically you should keep them in there. | |
links <- subset(links, Type=="HREF") # Optional line. Filter. | |
links <- subset(links, Follow=="true") | |
links <- subset(links, select=c(Source,Destination)) | |
g <- graph.data.frame(links) | |
pr <- page.rank(g, algo = "prpack", vids = V(g), directed = TRUE, damping = 0.85) | |
values <- data.frame(pr$vector) | |
values$names <- rownames(values) | |
row.names(values) <- NULL | |
values <- values[c(2,1)] | |
names(values)[1] <- "url" | |
names(values)[2] <- "pr" | |
# Use map function to map pagerank values from 1 to 10 | |
values$pr <- map(values$pr, c(1,10)) | |
# Swap out 'domain' and 'com' to represent your website address. | |
values <- values[grepl("https?:\\/\\/(.*\\.)?domain\\.com.*", values$url),] # Domain filter. | |
# Replace with your desired filename for the output file. | |
write.csv(values, file = "output-pagerank.csv") # Output file. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello,
Can you please guide me on how to use it?
Thanks for this great tool