-
-
Save Abbe98/499303219b8329557a5aeca70335413f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import difflib | |
import sys | |
import mwparserfromhell | |
import pywikibot | |
import requests | |
from pywikibot import pagegenerators | |
red = lambda text: f'\033[38;2;255;0;0m{text}\033[38;2;255;255;255m' | |
green = lambda text: f'\033[38;2;0;255;0m{text}\033[38;2;255;255;255m' | |
blue = lambda text: f'\033[38;2;0;0;255m{text}\033[38;2;255;255;255m' | |
white = lambda text: f'\033[38;2;255;255;255m{text}\033[38;2;255;255;255m' | |
def get_edits_string(old, new): | |
result = "" | |
codes = difflib.SequenceMatcher(a=old, b=new).get_opcodes() | |
for code in codes: | |
if code[0] == "equal": | |
result += white(old[code[1]:code[2]]) | |
elif code[0] == "delete": | |
result += red(old[code[1]:code[2]]) | |
elif code[0] == "insert": | |
result += green(new[code[3]:code[4]]) | |
elif code[0] == "replace": | |
result += (red(old[code[1]:code[2]]) + green(new[code[3]:code[4]])) | |
return result | |
def is_fmis_identifier(potential_identifier: bool) -> bool: | |
""" | |
Check if the potential_identifier is a valid FMIS identifier. | |
""" | |
if len(potential_identifier) == 14 and potential_identifier.isdigit(): | |
return True | |
return False | |
def get_kmr_identifier(fmis_identifier): | |
""" | |
Get the corresponding KMR identifier from the FMIS identifier. | |
""" | |
# HTTPS is needed here, becuase F5 will redirect before the request hits SOCH | |
url = f'https://kulturarvsdata.se/raa/fmi/{fmis_identifier}' | |
r = requests.head(url) | |
if r.status_code == 302: | |
uuid = r.headers['Location'].split('/')[-1] | |
if len(uuid) == 36: | |
return uuid | |
return False | |
# function from pagegenerators.py(could not be executed by scripts(known issue)) | |
def ReferringPageGenerator(referredPage, followRedirects=False, | |
with_template_inclusion=False, | |
only_template_inclusion=False, | |
total=None, content=False): | |
"""Yield all pages referring to a specific page.""" | |
return referredPage.getReferences( | |
follow_redirects=followRedirects, | |
with_template_inclusion=with_template_inclusion, | |
only_template_inclusion=only_template_inclusion, | |
total=total, content=content) | |
pywikibot.handle_args(sys.argv[1:]) | |
site = pywikibot.Site('commons', 'commons') | |
# generator | |
transclusionPage = pywikibot.Page(pywikibot.Link('Template:Fornminne', default_namespace=10, source=site)) | |
generator = ReferringPageGenerator(transclusionPage, only_template_inclusion=True) | |
i = 0 | |
for page in generator: | |
i+=1 | |
title = page.title() | |
print(f'{i}. {title}') | |
text = page.text | |
wikicode = mwparserfromhell.parse(text) | |
for template in wikicode.filter_templates(): | |
if template.name.matches('Fornminne'): | |
if template.has('objektid'): | |
print(f'Skipping {title}, it might be a special case') | |
for param in template.params: | |
if is_fmis_identifier(param): | |
kmr_identifier = get_kmr_identifier(param) | |
if not kmr_identifier: | |
print(f'Skipping template in {title}, failed to get KMR identifier') | |
continue | |
param.value.replace(param.value, kmr_identifier) | |
new_text = str(wikicode) | |
diff = get_edits_string(text, new_text) | |
page.text = new_text | |
page.save(summary='Replacing deprecated FMIS identifiers with KMR identifiers', minor=False) | |
print(diff) | |
if i > 1000: | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment