Last active
December 28, 2015 18:38
-
-
Save ringtaro/7544101 to your computer and use it in GitHub Desktop.
入門機会学習勉強会第11章ソーシャルグラフの分析
どうやってソーシャルネットワークのグラフを取得するか
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Graph programming | |
# | |
library(igraph) | |
(a.el <- rbind(c("a", "b"), c("b","c"))) | |
a.g <- graph.edgelist(a.el) | |
plot(a.g) | |
(b.el <- cbind(c("d"), c("a","b","c"))) | |
(c.el <- rbind(a.el, b.el)) | |
c.g <- graph.edgelist(c.el) | |
plot(c.g) | |
(d.el <- cbind(c("d"), c("a","b","c"))) | |
(d.el <- rbind(d.el, c("b", "c"))) | |
(d.el <- rbind(d.el, c(integer(0), integer(0)))) | |
# Omake | |
integer(2) | |
integer(0) | |
c("a", integer(0)) | |
(d.el <- rbind(d.el, c("a", integer(0)))) | |
(d.el <- rbind(d.el, "a")) | |
c(integer(0), "a") | |
(d.el <- rbind(d.el, c(integer(0), "a"))) | |
(d.el <- rbind(d.el, "a")) | |
(d.el <- cbind(c("d"), c("a","b","c"))) | |
(d.el <- cbind(c("d", "e"), c("a","b","c"))) #NG | |
(d.el <- cbind(c("d", "e"), c("a","b","c", "z"))) #OK | |
# | |
# build ego-network | |
# | |
library(RJSONIO) | |
find.twitter <- function(node.vector) { | |
twitter.nodes <- node.vector[grepl("http://twitter.com/", node.vector, fixed = TRUE)] | |
if(length(twitter.nodes) > 0) { | |
twitter.users <- strsplit(twitter.nodes, "/") | |
user.vec <- sapply(1:length(twitter.users), | |
function(i) (ifelse(twitter.users[[i]][4] == "account", NA, twitter.users[[i]][4]))) | |
return(user.vec[which(!is.na(user.vec))]) | |
} | |
else { | |
return(character(0)) | |
} | |
} | |
# https://socialgraph.googleapis.com/lookup?q=http://twitter.com/ringtaro&edo=1&edi=1 | |
(api.json <- fromJSON(paste(readLines("ringtaro.json"), collapse=""))) | |
(ego <- find.twitter(names(api.json$nodes))) | |
(nodes.out <- names(api.json$nodes[[1]]$nodes_referenced)) | |
(twitter.friends <- find.twitter(nodes.out)) | |
(friends <- cbind(ego, twitter.friends)) | |
(nodes.in <- names(api.json$nodes[[1]]$nodes_referenced_by)) | |
(twitter.followers <- find.twitter(nodes.in)) | |
(followers <- cbind(twitter.followers, ego)) | |
(ego.el <- rbind(friends, followers)) | |
# | |
# Snowball | |
# | |
get.seeds <- function(snowball.el, seed) { | |
new.seeds <- unique(c(snowball.el[,1], snowball.el[,2])) | |
return(new.seeds[which(new.seeds != seed)]) | |
} | |
user <- "olduser" | |
(user.el <- rbind( | |
cbind(c(user), c("newuser", "a", "c", "d")), | |
cbind(c("newuser", "e", "b", "d"), c(user)))) | |
(new.seeds <- get.seeds(user.el, user)) | |
snowball.el <- user.el | |
all.nodes <- user | |
next.seeds <- c() | |
user <- "newuser" | |
(user.el <- rbind( | |
cbind(c(user), c("a", "b", "c", "olduser")), | |
cbind(c("e", "b", "c", "olduser"), c(user)))) | |
(snowball.el <- rbind(snowball.el, user.el)) | |
(next.seeds <- c(next.seeds, get.seeds(user.el, user))) | |
(all.nodes <- c(all.nodes, user)) | |
user <- "newuser" | |
(user.el <- rbind( | |
cbind(c(user), c("a", "b", "c", "olduser")), | |
cbind(c("e", "b", "c", "olduser"), c(user)))) | |
(snowball.el <- rbind(snowball.el, user.el)) | |
(next.seeds <- c(next.seeds, get.seeds(user.el, user))) | |
(all.nodes <- c(all.nodes, user)) | |
(new.seeds <- unique(next.seeds)) | |
(new.seeds <- new.seeds[!(new.seeds %in% all.nodes)]) | |
(snowball.el <- snowball.el[!duplicated(snowball.el),]) | |
# | |
# Add Label | |
# | |
library(igraph) | |
ego0.net <- graph.edgelist(ego.el) | |
plot(ego0.net) | |
write.graph(ego0.net, "ego0.graphml", format = "graphml") | |
ego1.net <- set.vertex.attribute(ego0.net, "Label", value = get.vertex.attribute(ego0.net, "name")) | |
write.graph(ego1.net, "ego1.graphml", format = "graphml") | |
# | |
# Screening | |
# | |
# coreness | |
library(igraph) | |
a.el <- cbind(c("z"), c("a","b","c")) | |
a.g <- graph.edgelist(a.el,directed=FALSE) | |
plot(a.g) | |
graph.coreness(a.g) | |
a.el <- rbind(a.el, c("a","d")) | |
a.g <- graph.edgelist(a.el,directed=FALSE) | |
plot(a.g) | |
graph.coreness(a.g) | |
a.el <- rbind(a.el, c("b","d")) | |
a.g <- graph.edgelist(a.el,directed=FALSE) | |
plot(a.g) | |
graph.coreness(a.g) | |
a.el <- rbind(a.el, c("a","e")) | |
a.el <- rbind(a.el, c("d","e")) | |
a.g <- graph.edgelist(a.el,directed=FALSE) | |
plot(a.g) | |
graph.coreness(a.g) | |
a.el <- rbind(a.el, c("a","b")) | |
a.g <- graph.edgelist(a.el,directed=FALSE) | |
plot(a.g) | |
graph.coreness(a.g) | |
a.el <- rbind(a.el, c("e","b")) | |
a.g <- graph.edgelist(a.el,directed=FALSE) | |
plot(a.g) | |
graph.coreness(a.g) | |
a.cores <- graph.coreness(a.g) | |
a.cores > 1 | |
which(a.cores > 1) | |
a.clean <- induced.subgraph(a.g, which(a.cores > 1)) | |
plot(a.clean) | |
# ego-network | |
user = "a" | |
neighbors(a.g, user) | |
which(V(a.g)$name==user)[1] | |
c(which(V(a.g)$name==user)[1], neighbors(a.g, user)) | |
ego <- induced.subgraph(a.g, c(which(V(a.g)$name==user)[1], neighbors(a.g, user))) | |
plot(ego) | |
# | |
# Hierarchical Clustering | |
# | |
library(igraph) | |
user.el <- rbind( | |
cbind(c("seed"), c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z")), | |
cbind(c("a"), c("b", "c", "e", "h", "i", "l", "m", "n", "o", "v", "w", "y", "z")), | |
cbind(c("b"), c("h", "i", "j", "o", "q", "r", "s", "x", "y")), | |
cbind(c("c"), c("d", "f", "g", "h", "p", "r", "s", "w", "y")), | |
cbind(c("d"), c("e", "i", "j", "k", "p", "q", "u", "v", "z")), | |
cbind(c("e"), c("j", "k", "l", "x", "y", "z")), | |
cbind(c("f"), c("g", "n", "o", "p", "q", "r", "s", "w")), | |
cbind(c("g"), c("h", "i", "j", "u", "v", "w", "x", "y", "z")), | |
cbind(c("h"), c("i", "j", "k", "l", "n", "o", "q", "s")), | |
cbind(c("i"), c("j", "k", "u", "v", "w", "y")), | |
cbind(c("j"), c("v", "w", "z")), | |
cbind(c("k"), c("l", "m", "w", "x", "y", "z")), | |
cbind(c("l"), c("m", "n", "r", "s", "t", "u", "v", "w", "x", "y", "z")), | |
cbind(c("m"), c("n", "o", "v", "w", "x", "y", "z")), | |
cbind(c("n"), c("s", "t", "u", "v", "w", "x", "y", "z")), | |
cbind(c("o"), c("t", "u", "v", "y", "z")), | |
cbind(c("p"), c("q", "r", "x", "z")), | |
cbind(c("q"), c("r", "s", "t")), | |
cbind(c("r"), c("s", "v", "w", "y")), | |
cbind(c("s"), c("t", "u", "w")), | |
cbind(c("t"), c("u", "v", "z")), | |
cbind(c("u"), c("v", "w")), | |
cbind(c("v"), c("w", "y", "z")), | |
cbind(c("w"), c("x")), | |
cbind(c("x"), c("y")), | |
cbind(c("y"), c("z")), | |
cbind(c("a", "b", "c", "i", "j", "k", "l", "s", "t", "u", "v", "w", "x", "y", "z"), c("1")), | |
cbind(c("a", "b", "c", "d", "e", "f", "g", "p", "q", "v", "x", "y", "z"), c("2")), | |
cbind(c("h", "i", "j", "l", "m", "n", "o", "u", "v"), c("3")), | |
cbind(c("a", "b", "c", "d", "f", "g", "h", "z"), c("4")), | |
cbind(c("m", "n", "o", "p", "q", "v"), c("5")) | |
) | |
user.graph <- graph.edgelist(user.el, directed=FALSE) | |
plot(user.graph) | |
user = "seed" | |
user.ego <- induced.subgraph(user.graph, c(which(V(user.graph)$name==user)[1], neighbors(user.graph, user))) | |
plot(user.ego) | |
(sp <- shortest.paths(user.ego)) | |
(hc <- hclust(dist(sp))) | |
plot(hc) | |
(ct <- cutree(hc, 4)) | |
ct[order(ct)] | |
user.ego <- set.vertex.attribute(user.ego, "Label", value = get.vertex.attribute(user.ego, "name")) | |
for(i in 2:6) { | |
ct <- cutree(hc, k=i) | |
user.cluster <- as.character(ct) | |
user.cluster[which(names(ct)==user)[1]] <- "0" | |
user.ego <- set.vertex.attribute(user.ego, | |
name=paste("HC",i,sep=""), | |
value=user.cluster) | |
} | |
write.graph(user.ego, paste(user, "_ego.graphml", sep=""), format = "graphml") | |
# | |
# Suggest users | |
# | |
user <- "seed" | |
(friends <- V(user.graph)$name[neighbors(user.graph, user, mode = "out")]) | |
1:nrow(user.el) | |
user.el[1,] | |
(non.friends <- sapply( | |
1:nrow(user.el), | |
function(i) ifelse( | |
any(user.el[i,] == user | !user.el[i,1] %in% friends) | user.el[i,2] %in% friends, | |
FALSE, TRUE))) | |
(non.friends.el <- user.el[which(non.friends == TRUE),]) | |
(friends.count <- table(non.friends.el[,2])) | |
friends.followers <- data.frame(list(Twitter.Users = names(friends.count), | |
Friends.Following=as.numeric(friends.count)), stringsAsFactors = FALSE) | |
friends.followers$Friends.Norm <- friends.followers$Friends.Following / length(friends) | |
friends.followers <- friends.followers[with(friends.followers, order(Twitter.Users)),] | |
head(friends.followers) | |
write.csv(friends.followers, paste(user, "_friends_rec.csv", sep=""), row.names=FALSE) | |
# | |
# Suggest users 2 | |
# | |
#user.ego <- suppressWarnings(read.graph(paste(user, "_ego.graphml", sep = ""), format = "graphml")) | |
(friends.partitions <- cbind(V(user.ego)$HC6, V(user.ego)$name)) | |
partition.follows <- function(i) { | |
friends.in <- friends.partitions[which(friends.partitions[,1] == i),2] | |
partition.non.follow <- non.friends.el[which(!is.na(match(non.friends.el[,1], friends.in))),] | |
# If there are no matches for non-followers, return NA | |
if(nrow(partition.non.follow) < 2) { | |
return(c(i, NA)) | |
} | |
# If there are, return the most popualr user followed by members of this partition | |
else { | |
partition.favorite <- table(partition.non.follow[,2]) | |
partition.favorite <- partition.favorite[order(-partition.favorite)] | |
return(c(i, names(partition.favorite)[1])) | |
} | |
} | |
# Run the partition.follow function over all parition, and remove the NAs and duplicate reccomendations | |
(partition.recs <- t(sapply(unique(friends.partitions[,1]), partition.follows))) | |
(partition.recs <- partition.recs[!is.na(partition.recs[,2]) & !duplicated(partition.recs[,2]),]) | |
# Get the node index for the entire graph, plus reccommended users | |
(new.friends <- as.character(c(V(user.ego)$name, partition.recs[,2]))) | |
(new.index <- match(new.friends, V(user.graph)$name)) | |
# Take a new subgraph, which includes the new reccommendations | |
partition.graph <- subgraph(user.graph, new.index) | |
# Add some vertex attribute data for the visualization | |
(all.partition <- rbind(cbind(get.vertex.attribute(user.ego, "HC6"), V(user.ego)$name), partition.recs)) | |
(all.index <- match(as.character(all.partition[,2]), V(partition.graph)$name)) | |
partition.graph <- set.vertex.attribute(partition.graph, "REC", index = all.index, value = all.partition[,1]) | |
(vertex.sizes <- c("3", rep("1", vcount(user.ego)-1), rep("2", nrow(partition.recs)))) | |
partition.graph <- set.vertex.attribute(partition.graph, "SIZE", index = all.index, value = vertex.sizes) | |
partition.graph <- set.vertex.attribute(partition.graph, "Label", value = get.vertex.attribute(partition.graph, "name")) | |
write.graph(partition.graph, paste(user, "_rec.graphml",sep = ""), format = "graphml") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment