Skip to content

Instantly share code, notes, and snippets.

@masaha03
Created May 15, 2011 11:59
Show Gist options
  • Save masaha03/973085 to your computer and use it in GitHub Desktop.
Save masaha03/973085 to your computer and use it in GitHub Desktop.
KAKENから研究者ネットワーク作成
library(XML)
# 社会学者一覧
# 研究種目が社会学の研究者を検索→研究者番号を取得→検索2ページ目以降の研究者の研究者番号をループで所得
# 取得の所要時間は5分程度
search.result <- xmlParse(enc2utf8('http://kaken.nii.ac.jp/opensearchr.cgi?count=200&q1="社会学"')) # 研究者検索
search.result <- xmlToList(search.result)
search.result <- rapply(search.result, function(x){iconv(x,"UTF-8","CP932")}, how="replace")
entries <- search.result[which(names(search.result)=="entry")]
r.names <- sapply(entries, function(x){x$title$text}) # 研究者名
links <- sapply(entries, function(x){x$link}) # 研究者ページへのリンク
numbers <- sub("http://kaken.nii.ac.jp/ja/r/", "", links) # 研究者番号
names(numbers) <- r.names
selfURL <- search.result[[11]][[2]] # 検索の現在のページ
nextURL <- search.result[[13]][[2]] # 検索の次のページ
lastURL <- search.result[[14]][[2]] # 検索の最後のページ
continue <- TRUE
while(continue) { # 検索の2ページ目以降の研究者をループで取得
search.result <- xmlParse(enc2utf8(nextURL))
search.result <- xmlToList(search.result)
search.result <- rapply(search.result, function(x){iconv(x,"UTF-8","CP932")}, how="replace")
entries <- search.result[which(names(search.result)=="entry")]
r.names <- sapply(entries, function(x){x$title$text})
links <- sapply(entries, function(x){x$link})
numbers.tmp <- sub("http://kaken.nii.ac.jp/ja/r/", "", links)
names(numbers.tmp) <- r.names
numbers <- c(numbers, numbers.tmp)
selfURL <- search.result[[11]][[2]]
nextURL <- search.result[[14]][[2]]
continue <- selfURL != lastURL # 現在のページのURLと最後のページのURLが同じならば終了
}
length(numbers)
# 研究者ネットワーク
# 研究者番号から研究者ネットワークを作成(https://gist.github.com/973078)
# 100人分で8分程度
numbers.sample <- sample(numbers, 100) # 研究者をサンプリング
researcher.network <- list()
for (number in numbers.sample){
url <- sprintf("http://kaken.nii.ac.jp/opensearchk.cgi?count=200&q15=%s", number)
Sys.sleep(0.5) # サーバー負荷対策
search.result <- xmlParse(enc2utf8(url))
search.result <- xmlToList(search.result)
search.result <- rapply(search.result, function(x){iconv(x,"UTF-8","CP932")}, how="replace")
entries <- search.result[which(names(search.result)=="entry")]
links <- sapply(entries, function(x){x$link})
links <- sub("ja","rdf", links)
membersNum <- list()
for (i in seq_along(links)){
Sys.sleep(0.5) # サーバー負荷対策
kaken.page <- xmlParse(links[i])
kaken.page <- xmlToList(kaken.page)
kaken.page <- rapply(kaken.page, function(x){iconv(x,"UTF-8","CP932")}, how="replace")
project <- kaken.page$Project
members <- project[which(names(project)=="member")]
membersNum[[i]] <- sapply(members, function(x){x$Researcher$researcherNumber})
}
membersNum <- list(unique(unlist(membersNum)))
names(membersNum) <- number
researcher.network <- c(researcher.network, membersNum)
}
# 隣接行列作成
row <- unique(unlist(researcher.network))
r.names <- names(researcher.network)
mat <- matrix(0, nrow=length(numbers), ncol=length(numbers), dimnames=list(numbers, numbers))
for (i in seq_along(researcher.network)){
col <- names(researcher.network)[i]
row <- researcher.network[[i]]
row <- row[row %in% numbers] # 研究種目が社会学でない研究者を除去
mat[row, col] <- 1
}
mat <- mat[r.names, r.names]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment