Skip to content

Instantly share code, notes, and snippets.

@Ladsgroup
Created June 1, 2015 13:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ladsgroup/dccec148e8ecc1fab476 to your computer and use it in GitHub Desktop.
Save Ladsgroup/dccec148e8ecc1fab476 to your computer and use it in GitHub Desktop.
Script to fix interwiki of translations
# License: MIT
import pywikibot, codecs, json
site = pywikibot.Site('ca')
offset = 0
cases = []
while True:
req = pywikibot.data.api.Request(site=site, action='query', list='cxpublishedtranslations', limit=500, offset=offset)
offset += 500
res = req.submit()
if not res['result']['translations']:
break
for case in res['result']['translations']:
if '/wiki/User:' in case['targetURL']:
continue
new_case = {
'sourceTitle': case['sourceTitle'],
'targetTitle': case['targetTitle'],
'sourceLanguage': case['sourceLanguage'],
'targetLanguage': case['targetLanguage'],
}
cases.append(new_case)
with codecs.open('res.txt', 'w', 'utf-8') as f:
f.write(json.dumps(cases))
issues = []
for case in cases:
try:
source_site = pywikibot.Site(case['sourceLanguage'])
target_site = pywikibot.Site(case['targetLanguage'])
source_page = pywikibot.Page(source_site, case['sourceTitle'])
target_page = pywikibot.Page(target_site, case['targetTitle'])
target_page.exists()
source_page.exists()
except:
case['notes'] = 'Unknown Error'
issues.append(case)
with codecs.open('errors.txt', 'w', 'utf-8') as f:
continue
if not target_page.exists() or not source_page.exists():
case['notes'] = 'Source or target does not exist'
issues.append(case)
with codecs.open('errors.txt', 'w', 'utf-8') as f:
f.write(json.dumps(issues))
continue
if source_page.isRedirectPage():
source_page = source_page.getRedirectTarget()
if target_page.isRedirectPage():
target_page = target_page.getRedirectTarget()
source_item = None
target_item = None
try:
source_item = pywikibot.ItemPage.fromPage(source_page)
except:
pass
try:
target_item = pywikibot.ItemPage.fromPage(target_page)
except:
pass
if not target_item:
if not source_item:
case['notes'] = 'None of them are in Wikidata'
issues.append(case)
with codecs.open('errors.txt', 'w', 'utf-8') as f:
f.write(json.dumps(issues))
else:
source_item.setSitelink(target_page)
elif not source_item:
target_item.setSitelink(source_page)
else:
if target_item.getID() != source_item.getID():
case['notes'] = 'Items need merge'
issues.append(case)
with codecs.open('errors.txt', 'w', 'utf-8') as f:
f.write(json.dumps(issues))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment