Skip to content

Instantly share code, notes, and snippets.

@yubessy
Created June 29, 2014 03:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yubessy/ac8bb81b3dd6034b5189 to your computer and use it in GitHub Desktop.
Save yubessy/ac8bb81b3dd6034b5189 to your computer and use it in GitHub Desktop.
MediaWIki APIを使って項目の別名を取得したりなど
#-*- encoding: utf-8 -*-
# thirdlib
import requests
BASE_URL = "http://en.wikipedia.org/w/api.php"
BLLIMIT = 250
def get_regular_name(q):
u"""
リダイレクトを利用して一般項目名を取得
"""
# Mediawiki APIに渡すパラメータ
# 例: http://en.wikipedia.org/w/api.php?action=query&format=json&prop=info&titles=3.11&redirects
params = {
"action": "query",
"format": "json",
"prop": "info",
"titles": "{0}".format(q),
"redirects": ""}
# APIから結果をJSONで取得
res = requests.get(BASE_URL, params=params)
jsondata = res.json()
# 一般名が無ければNoneを返し、あればstrにして返す
pages = jsondata["query"]["pages"]
if "-1" in pages:
return None
return pages.items()[0][1]["title"].encode("utf-8")
def get_variant_names(q, regularize=True):
"""
リダイレクトを利用して代替項目名を取得
"""
# 一般名を取得
if regularize:
q = get_regular_name(q)
# Mediawiki APIに渡すパラメータ
# 例: http://en.wikipedia.org/w/api.php?action=query&format=json&list=backlinks&bltitle=OCaml&blnamespace=0&blfilterredir=redirects&bllimit=250
params = {
"action": "query",
"format": "json",
"list": "backlinks",
"bltitle": "{0}".format(q),
"blnamespace": "0",
"blfilterredir": "redirects",
"bllimit": "{0}".format(BLLIMIT)}
result = []
loop = True
while loop:
# APIの結果を取得
res = requests.get(BASE_URL, params=params)
jsondata = res.json()
# リダイレクトしている項目名を取得し、resultに追加
titles = [d["title"].encode("utf-8")
for d in jsondata["query"]["backlinks"]]
result += titles
# 結果がまだ残っていればループを継続
if "query-continue" in jsondata:
params["blcontinue"] = (
jsondata["query-continue"]["backlinks"]["blcontinue"])
else:
loop = False
return sorted(list(set(result)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment