Skip to content

Instantly share code, notes, and snippets.

@Abbe98
Created March 9, 2022 21:47
Show Gist options
  • Save Abbe98/499303219b8329557a5aeca70335413f to your computer and use it in GitHub Desktop.
Save Abbe98/499303219b8329557a5aeca70335413f to your computer and use it in GitHub Desktop.
import difflib
import sys
import mwparserfromhell
import pywikibot
import requests
from pywikibot import pagegenerators
red = lambda text: f'\033[38;2;255;0;0m{text}\033[38;2;255;255;255m'
green = lambda text: f'\033[38;2;0;255;0m{text}\033[38;2;255;255;255m'
blue = lambda text: f'\033[38;2;0;0;255m{text}\033[38;2;255;255;255m'
white = lambda text: f'\033[38;2;255;255;255m{text}\033[38;2;255;255;255m'
def get_edits_string(old, new):
result = ""
codes = difflib.SequenceMatcher(a=old, b=new).get_opcodes()
for code in codes:
if code[0] == "equal":
result += white(old[code[1]:code[2]])
elif code[0] == "delete":
result += red(old[code[1]:code[2]])
elif code[0] == "insert":
result += green(new[code[3]:code[4]])
elif code[0] == "replace":
result += (red(old[code[1]:code[2]]) + green(new[code[3]:code[4]]))
return result
def is_fmis_identifier(potential_identifier: bool) -> bool:
"""
Check if the potential_identifier is a valid FMIS identifier.
"""
if len(potential_identifier) == 14 and potential_identifier.isdigit():
return True
return False
def get_kmr_identifier(fmis_identifier):
"""
Get the corresponding KMR identifier from the FMIS identifier.
"""
# HTTPS is needed here, becuase F5 will redirect before the request hits SOCH
url = f'https://kulturarvsdata.se/raa/fmi/{fmis_identifier}'
r = requests.head(url)
if r.status_code == 302:
uuid = r.headers['Location'].split('/')[-1]
if len(uuid) == 36:
return uuid
return False
# function from pagegenerators.py(could not be executed by scripts(known issue))
def ReferringPageGenerator(referredPage, followRedirects=False,
with_template_inclusion=False,
only_template_inclusion=False,
total=None, content=False):
"""Yield all pages referring to a specific page."""
return referredPage.getReferences(
follow_redirects=followRedirects,
with_template_inclusion=with_template_inclusion,
only_template_inclusion=only_template_inclusion,
total=total, content=content)
pywikibot.handle_args(sys.argv[1:])
site = pywikibot.Site('commons', 'commons')
# generator
transclusionPage = pywikibot.Page(pywikibot.Link('Template:Fornminne', default_namespace=10, source=site))
generator = ReferringPageGenerator(transclusionPage, only_template_inclusion=True)
i = 0
for page in generator:
i+=1
title = page.title()
print(f'{i}. {title}')
text = page.text
wikicode = mwparserfromhell.parse(text)
for template in wikicode.filter_templates():
if template.name.matches('Fornminne'):
if template.has('objektid'):
print(f'Skipping {title}, it might be a special case')
for param in template.params:
if is_fmis_identifier(param):
kmr_identifier = get_kmr_identifier(param)
if not kmr_identifier:
print(f'Skipping template in {title}, failed to get KMR identifier')
continue
param.value.replace(param.value, kmr_identifier)
new_text = str(wikicode)
diff = get_edits_string(text, new_text)
page.text = new_text
page.save(summary='Replacing deprecated FMIS identifiers with KMR identifiers', minor=False)
print(diff)
if i > 1000:
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment