Skip to content

Instantly share code, notes, and snippets.

@danmichaelo
Last active July 14, 2017 15:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danmichaelo/266a72a1f959fd9127383e2d4acd1453 to your computer and use it in GitHub Desktop.
Save danmichaelo/266a72a1f959fd9127383e2d4acd1453 to your computer and use it in GitHub Desktop.
DanmicholoBot 8 : import establishment dates
import pwb
import pywikibot
from pywikibot.page import Claim
from pywikibot.data import sparql
import re
import logging
import time
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger()
site = pywikibot.Site('no', 'wikipedia')
repo = site.data_repository()
endpoint = sparql.SparqlQuery(repo=repo)
title_cache = set()
edit_counter = 0
def get_item(page, year):
"""
Return the corresponding Wikidata item *if* the item
(1) is a 'organization' or 'construction' AND
(2) doesn't have any P580/* claims
"""
try:
item = page.data_item()
except pywikibot.exceptions.NoPage:
logger.warning('No wikidata item for page: %s', page.title())
return None
item_id = item.getID()
query = """
SELECT ?date ?cls
WHERE
{
wd:%(item_id)s wdt:P31/wdt:P279* ?cls . # organization
OPTIONAL {
wd:%(item_id)s ?prop ?date .
?prop ^wikibase:directClaim/wdt:P1647* wd:P580 .
}
VALUES ?cls {
wd:Q43229 # organization
wd:Q811430 # construction
wd:Q2065736 # kulturminne
wd:Q1261026 # trykksak
wd:Q732577 # publikasjon
}
}
""" % {'item_id': item_id}
data = endpoint.select(query)
if len(data) == 0:
logger.info('Ignoring page: %s', page.title())
return None
else:
if data[0]['date'] is not None:
logger.info('Already a statement at %s: %s. Our year: %d', page.title(), data[0]['date'], year)
return None
else:
return item
def process_page(page, year):
global edit_counter
"""
Check if we should add <year> as establishment date for the item
linked to <page>.
"""
if page.title() in title_cache:
return
title_cache.add(page.title())
item = get_item(page, year)
if item is not None:
logger.info(' -> Add %d to %s (%s)?', year, page.title(), item.getID())
edit_counter += 1
logger.info('Current: %d', edit_counter)
# time.sleep(1)
# process_item(item, year)
def process_item(item, year):
"""
Add <year> as establishment date for <item>.
"""
claim = Claim(repo, 'P571')
claim.setTarget(pywikibot.WbTime(year=year))
item.addClaim(claim)
sourceClaim = Claim(repo, 'P143') # imported from
sourceClaim.setTarget(pywikibot.ItemPage(repo, 'Q191769')) # nowiki
claim.addSource(sourceClaim)
def process_category(cat):
"""
Traverse category tree starting from <cat>
"""
# logger.info(cat.title())
m = re.search('(?:etableringer|etablert|grunnlagt) i (\d+)$', cat.title(), re.I)
if m:
year = int(m.group(1))
else:
year = None
for member in cat.members():
if isinstance(member, pywikibot.page.Category):
m = re.search('(etableringer|etablert|grunnlagt)', member.title(), re.I)
if m is None:
logger.warning('Will not follow subcategory: %s', member.title())
else:
process_category(member)
elif isinstance(member, pywikibot.page.Page) and not member.isRedirectPage():
if year is None:
logger.warning('Did not match: %s', cat.title())
else:
process_page(member, year)
cat = pywikibot.Category(site, 'Kategori:Selskaper etter etableringsår')
process_category(cat)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment