Created
June 29, 2014 03:19
-
-
Save yubessy/ac8bb81b3dd6034b5189 to your computer and use it in GitHub Desktop.
MediaWIki APIを使って項目の別名を取得したりなど
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- encoding: utf-8 -*- | |
# thirdlib | |
import requests | |
BASE_URL = "http://en.wikipedia.org/w/api.php" | |
BLLIMIT = 250 | |
def get_regular_name(q): | |
u""" | |
リダイレクトを利用して一般項目名を取得 | |
""" | |
# Mediawiki APIに渡すパラメータ | |
# 例: http://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&titles=3.11&redirects | |
params = { | |
"action": "query", | |
"format": "json", | |
"prop": "info", | |
"titles": "{0}".format(q), | |
"redirects": ""} | |
# APIから結果をJSONで取得 | |
res = requests.get(BASE_URL, params=params) | |
jsondata = res.json() | |
# 一般名が無ければNoneを返し、あればstrにして返す | |
pages = jsondata["query"]["pages"] | |
if "-1" in pages: | |
return None | |
return pages.items()[0][1]["title"].encode("utf-8") | |
def get_variant_names(q, regularize=True): | |
""" | |
リダイレクトを利用して代替項目名を取得 | |
""" | |
# 一般名を取得 | |
if regularize: | |
q = get_regular_name(q) | |
# Mediawiki APIに渡すパラメータ | |
# 例: http://en.wikipedia.org/w/api.php?action=query&format=json&list=backlinks&bltitle=OCaml&blnamespace=0&blfilterredir=redirects&bllimit=250 | |
params = { | |
"action": "query", | |
"format": "json", | |
"list": "backlinks", | |
"bltitle": "{0}".format(q), | |
"blnamespace": "0", | |
"blfilterredir": "redirects", | |
"bllimit": "{0}".format(BLLIMIT)} | |
result = [] | |
loop = True | |
while loop: | |
# APIの結果を取得 | |
res = requests.get(BASE_URL, params=params) | |
jsondata = res.json() | |
# リダイレクトしている項目名を取得し、resultに追加 | |
titles = [d["title"].encode("utf-8") | |
for d in jsondata["query"]["backlinks"]] | |
result += titles | |
# 結果がまだ残っていればループを継続 | |
if "query-continue" in jsondata: | |
params["blcontinue"] = ( | |
jsondata["query-continue"]["backlinks"]["blcontinue"]) | |
else: | |
loop = False | |
return sorted(list(set(result))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment