Skip to content

Instantly share code, notes, and snippets.

@ringtaro
Last active December 28, 2015 18:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save ringtaro/7544101 to your computer and use it in GitHub Desktop.
Save ringtaro/7544101 to your computer and use it in GitHub Desktop.
入門機会学習勉強会第11章ソーシャルグラフの分析 どうやってソーシャルネットワークのグラフを取得するか
#
# Graph programming
#
library(igraph)
(a.el <- rbind(c("a", "b"), c("b","c")))
a.g <- graph.edgelist(a.el)
plot(a.g)
(b.el <- cbind(c("d"), c("a","b","c")))
(c.el <- rbind(a.el, b.el))
c.g <- graph.edgelist(c.el)
plot(c.g)
(d.el <- cbind(c("d"), c("a","b","c")))
(d.el <- rbind(d.el, c("b", "c")))
(d.el <- rbind(d.el, c(integer(0), integer(0))))
# Omake
integer(2)
integer(0)
c("a", integer(0))
(d.el <- rbind(d.el, c("a", integer(0))))
(d.el <- rbind(d.el, "a"))
c(integer(0), "a")
(d.el <- rbind(d.el, c(integer(0), "a")))
(d.el <- rbind(d.el, "a"))
(d.el <- cbind(c("d"), c("a","b","c")))
(d.el <- cbind(c("d", "e"), c("a","b","c"))) #NG
(d.el <- cbind(c("d", "e"), c("a","b","c", "z"))) #OK
#
# build ego-network
#
library(RJSONIO)
find.twitter <- function(node.vector) {
twitter.nodes <- node.vector[grepl("http://twitter.com/", node.vector, fixed = TRUE)]
if(length(twitter.nodes) > 0) {
twitter.users <- strsplit(twitter.nodes, "/")
user.vec <- sapply(1:length(twitter.users),
function(i) (ifelse(twitter.users[[i]][4] == "account", NA, twitter.users[[i]][4])))
return(user.vec[which(!is.na(user.vec))])
}
else {
return(character(0))
}
}
# https://socialgraph.googleapis.com/lookup?q=http://twitter.com/ringtaro&edo=1&edi=1
(api.json <- fromJSON(paste(readLines("ringtaro.json"), collapse="")))
(ego <- find.twitter(names(api.json$nodes)))
(nodes.out <- names(api.json$nodes[[1]]$nodes_referenced))
(twitter.friends <- find.twitter(nodes.out))
(friends <- cbind(ego, twitter.friends))
(nodes.in <- names(api.json$nodes[[1]]$nodes_referenced_by))
(twitter.followers <- find.twitter(nodes.in))
(followers <- cbind(twitter.followers, ego))
(ego.el <- rbind(friends, followers))
#
# Snowball
#
get.seeds <- function(snowball.el, seed) {
new.seeds <- unique(c(snowball.el[,1], snowball.el[,2]))
return(new.seeds[which(new.seeds != seed)])
}
user <- "olduser"
(user.el <- rbind(
cbind(c(user), c("newuser", "a", "c", "d")),
cbind(c("newuser", "e", "b", "d"), c(user))))
(new.seeds <- get.seeds(user.el, user))
snowball.el <- user.el
all.nodes <- user
next.seeds <- c()
user <- "newuser"
(user.el <- rbind(
cbind(c(user), c("a", "b", "c", "olduser")),
cbind(c("e", "b", "c", "olduser"), c(user))))
(snowball.el <- rbind(snowball.el, user.el))
(next.seeds <- c(next.seeds, get.seeds(user.el, user)))
(all.nodes <- c(all.nodes, user))
user <- "newuser"
(user.el <- rbind(
cbind(c(user), c("a", "b", "c", "olduser")),
cbind(c("e", "b", "c", "olduser"), c(user))))
(snowball.el <- rbind(snowball.el, user.el))
(next.seeds <- c(next.seeds, get.seeds(user.el, user)))
(all.nodes <- c(all.nodes, user))
(new.seeds <- unique(next.seeds))
(new.seeds <- new.seeds[!(new.seeds %in% all.nodes)])
(snowball.el <- snowball.el[!duplicated(snowball.el),])
#
# Add Label
#
library(igraph)
ego0.net <- graph.edgelist(ego.el)
plot(ego0.net)
write.graph(ego0.net, "ego0.graphml", format = "graphml")
ego1.net <- set.vertex.attribute(ego0.net, "Label", value = get.vertex.attribute(ego0.net, "name"))
write.graph(ego1.net, "ego1.graphml", format = "graphml")
#
# Screening
#
# coreness
library(igraph)
a.el <- cbind(c("z"), c("a","b","c"))
a.g <- graph.edgelist(a.el,directed=FALSE)
plot(a.g)
graph.coreness(a.g)
a.el <- rbind(a.el, c("a","d"))
a.g <- graph.edgelist(a.el,directed=FALSE)
plot(a.g)
graph.coreness(a.g)
a.el <- rbind(a.el, c("b","d"))
a.g <- graph.edgelist(a.el,directed=FALSE)
plot(a.g)
graph.coreness(a.g)
a.el <- rbind(a.el, c("a","e"))
a.el <- rbind(a.el, c("d","e"))
a.g <- graph.edgelist(a.el,directed=FALSE)
plot(a.g)
graph.coreness(a.g)
a.el <- rbind(a.el, c("a","b"))
a.g <- graph.edgelist(a.el,directed=FALSE)
plot(a.g)
graph.coreness(a.g)
a.el <- rbind(a.el, c("e","b"))
a.g <- graph.edgelist(a.el,directed=FALSE)
plot(a.g)
graph.coreness(a.g)
a.cores <- graph.coreness(a.g)
a.cores > 1
which(a.cores > 1)
a.clean <- induced.subgraph(a.g, which(a.cores > 1))
plot(a.clean)
# ego-network
user = "a"
neighbors(a.g, user)
which(V(a.g)$name==user)[1]
c(which(V(a.g)$name==user)[1], neighbors(a.g, user))
ego <- induced.subgraph(a.g, c(which(V(a.g)$name==user)[1], neighbors(a.g, user)))
plot(ego)
#
# Hierarchical Clustering
#
library(igraph)
user.el <- rbind(
cbind(c("seed"), c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z")),
cbind(c("a"), c("b", "c", "e", "h", "i", "l", "m", "n", "o", "v", "w", "y", "z")),
cbind(c("b"), c("h", "i", "j", "o", "q", "r", "s", "x", "y")),
cbind(c("c"), c("d", "f", "g", "h", "p", "r", "s", "w", "y")),
cbind(c("d"), c("e", "i", "j", "k", "p", "q", "u", "v", "z")),
cbind(c("e"), c("j", "k", "l", "x", "y", "z")),
cbind(c("f"), c("g", "n", "o", "p", "q", "r", "s", "w")),
cbind(c("g"), c("h", "i", "j", "u", "v", "w", "x", "y", "z")),
cbind(c("h"), c("i", "j", "k", "l", "n", "o", "q", "s")),
cbind(c("i"), c("j", "k", "u", "v", "w", "y")),
cbind(c("j"), c("v", "w", "z")),
cbind(c("k"), c("l", "m", "w", "x", "y", "z")),
cbind(c("l"), c("m", "n", "r", "s", "t", "u", "v", "w", "x", "y", "z")),
cbind(c("m"), c("n", "o", "v", "w", "x", "y", "z")),
cbind(c("n"), c("s", "t", "u", "v", "w", "x", "y", "z")),
cbind(c("o"), c("t", "u", "v", "y", "z")),
cbind(c("p"), c("q", "r", "x", "z")),
cbind(c("q"), c("r", "s", "t")),
cbind(c("r"), c("s", "v", "w", "y")),
cbind(c("s"), c("t", "u", "w")),
cbind(c("t"), c("u", "v", "z")),
cbind(c("u"), c("v", "w")),
cbind(c("v"), c("w", "y", "z")),
cbind(c("w"), c("x")),
cbind(c("x"), c("y")),
cbind(c("y"), c("z")),
cbind(c("a", "b", "c", "i", "j", "k", "l", "s", "t", "u", "v", "w", "x", "y", "z"), c("1")),
cbind(c("a", "b", "c", "d", "e", "f", "g", "p", "q", "v", "x", "y", "z"), c("2")),
cbind(c("h", "i", "j", "l", "m", "n", "o", "u", "v"), c("3")),
cbind(c("a", "b", "c", "d", "f", "g", "h", "z"), c("4")),
cbind(c("m", "n", "o", "p", "q", "v"), c("5"))
)
user.graph <- graph.edgelist(user.el, directed=FALSE)
plot(user.graph)
user = "seed"
user.ego <- induced.subgraph(user.graph, c(which(V(user.graph)$name==user)[1], neighbors(user.graph, user)))
plot(user.ego)
(sp <- shortest.paths(user.ego))
(hc <- hclust(dist(sp)))
plot(hc)
(ct <- cutree(hc, 4))
ct[order(ct)]
user.ego <- set.vertex.attribute(user.ego, "Label", value = get.vertex.attribute(user.ego, "name"))
for(i in 2:6) {
ct <- cutree(hc, k=i)
user.cluster <- as.character(ct)
user.cluster[which(names(ct)==user)[1]] <- "0"
user.ego <- set.vertex.attribute(user.ego,
name=paste("HC",i,sep=""),
value=user.cluster)
}
write.graph(user.ego, paste(user, "_ego.graphml", sep=""), format = "graphml")
#
# Suggest users
#
user <- "seed"
(friends <- V(user.graph)$name[neighbors(user.graph, user, mode = "out")])
1:nrow(user.el)
user.el[1,]
(non.friends <- sapply(
1:nrow(user.el),
function(i) ifelse(
any(user.el[i,] == user | !user.el[i,1] %in% friends) | user.el[i,2] %in% friends,
FALSE, TRUE)))
(non.friends.el <- user.el[which(non.friends == TRUE),])
(friends.count <- table(non.friends.el[,2]))
friends.followers <- data.frame(list(Twitter.Users = names(friends.count),
Friends.Following=as.numeric(friends.count)), stringsAsFactors = FALSE)
friends.followers$Friends.Norm <- friends.followers$Friends.Following / length(friends)
friends.followers <- friends.followers[with(friends.followers, order(Twitter.Users)),]
head(friends.followers)
write.csv(friends.followers, paste(user, "_friends_rec.csv", sep=""), row.names=FALSE)
#
# Suggest users 2
#
#user.ego <- suppressWarnings(read.graph(paste(user, "_ego.graphml", sep = ""), format = "graphml"))
(friends.partitions <- cbind(V(user.ego)$HC6, V(user.ego)$name))
partition.follows <- function(i) {
friends.in <- friends.partitions[which(friends.partitions[,1] == i),2]
partition.non.follow <- non.friends.el[which(!is.na(match(non.friends.el[,1], friends.in))),]
# If there are no matches for non-followers, return NA
if(nrow(partition.non.follow) < 2) {
return(c(i, NA))
}
# If there are, return the most popualr user followed by members of this partition
else {
partition.favorite <- table(partition.non.follow[,2])
partition.favorite <- partition.favorite[order(-partition.favorite)]
return(c(i, names(partition.favorite)[1]))
}
}
# Run the partition.follow function over all parition, and remove the NAs and duplicate reccomendations
(partition.recs <- t(sapply(unique(friends.partitions[,1]), partition.follows)))
(partition.recs <- partition.recs[!is.na(partition.recs[,2]) & !duplicated(partition.recs[,2]),])
# Get the node index for the entire graph, plus reccommended users
(new.friends <- as.character(c(V(user.ego)$name, partition.recs[,2])))
(new.index <- match(new.friends, V(user.graph)$name))
# Take a new subgraph, which includes the new reccommendations
partition.graph <- subgraph(user.graph, new.index)
# Add some vertex attribute data for the visualization
(all.partition <- rbind(cbind(get.vertex.attribute(user.ego, "HC6"), V(user.ego)$name), partition.recs))
(all.index <- match(as.character(all.partition[,2]), V(partition.graph)$name))
partition.graph <- set.vertex.attribute(partition.graph, "REC", index = all.index, value = all.partition[,1])
(vertex.sizes <- c("3", rep("1", vcount(user.ego)-1), rep("2", nrow(partition.recs))))
partition.graph <- set.vertex.attribute(partition.graph, "SIZE", index = all.index, value = vertex.sizes)
partition.graph <- set.vertex.attribute(partition.graph, "Label", value = get.vertex.attribute(partition.graph, "name"))
write.graph(partition.graph, paste(user, "_rec.graphml",sep = ""), format = "graphml")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment