Skip to content

Instantly share code, notes, and snippets.

@Ladsgroup
Created August 5, 2015 13:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ladsgroup/602ae2ba5db7a3031901 to your computer and use it in GitHub Desktop.
Save Ladsgroup/602ae2ba5db7a3031901 to your computer and use it in GitHub Desktop.
Interwiki for CX
GNU nano 2.2.6 File: scripts/amire80.py
# License: MIT
import pywikibot, codecs, json
site = pywikibot.Site('ca')
offset = 0
_base_dir = '/data/project/dexbot/pywikibot-core/'
cases = []
while True:
req = pywikibot.data.api.Request(site=site, action='query', list='cxpublishedtranslations', limit=500, offset=offset)
offset += 500
res = req.submit()
if not res['result']['translations']:
break
for case in res['result']['translations']:
if '/wiki/User:' in case['targetURL']:
continue
new_case = {
'sourceTitle': case['sourceTitle'],
'targetTitle': case['targetTitle'],
'sourceLanguage': case['sourceLanguage'],
'targetLanguage': case['targetLanguage'],
}
cases.append(new_case)
with codecs.open('res.txt', 'w', 'utf-8') as f:
f.write(json.dumps(cases))
with codecs.open('%sres.txt' % _base_dir, 'r', 'utf-8') as f:
cases = json.loads(f.read())
issues = []
for case in cases:
# if not 'Baebele' in str(case):
# continue
try:
source_site = pywikibot.Site(case['sourceLanguage'])
target_site = pywikibot.Site(case['targetLanguage'])
source_page = pywikibot.Page(source_site, case['sourceTitle'])
target_page = pywikibot.Page(target_site, case['targetTitle'])
target_page.exists()
source_page.exists()
except:
continue
case['notes'] = 'Unknown Error'
issues.append(case)
with codecs.open('%serrors.txt' % _base_dir, 'w', 'utf-8') as f:
f.write(json.dumps(cases))
continue
if not target_page.exists() or not source_page.exists():
continue
case['notes'] = 'Source or target does not exist'
issues.append(case)
with codecs.open('%serrors.txt' % _base_dir, 'w', 'utf-8') as f:
f.write(json.dumps(issues))
continue
if target_page.namespace() != source_page.namespace():
print("Namespaces doesn't match")
continue
if source_page.isRedirectPage():
source_page = source_page.getRedirectTarget()
if target_page.isRedirectPage():
target_page = target_page.getRedirectTarget()
source_item = None
target_item = None
try:
source_item = pywikibot.ItemPage.fromPage(source_page)
except:
pass
try:
target_item = pywikibot.ItemPage.fromPage(target_page)
except:
pass
if not target_item:
if not source_item:
case['notes'] = 'None of them are in Wikidata'
issues.append(case)
with codecs.open('%serrors.txt' % _base_dir, 'w', 'utf-8') as f:
f.write(json.dumps(issues))
elif source_item.exists():
try:
if source_item.getSitelink(target_site):
print source_item.getID()
else:
source_item.setSitelink(target_page)
except pywikibot.exceptions.NoPage:
try:
source_item.setSitelink(target_page)
except:
pass
elif not source_item and target_item.exists():
try:
target_item.getSitelink(source_site)
except:
continue
if target_item.getSitelink(source_site):
print target_item.getID()
else:
target_item.setSitelink(source_page)
else:
if target_item.getID() != source_item.getID():
case['notes'] = 'Items need merge'
issues.append(case)
with codecs.open('%serrors.txt' % _base_dir, 'w', 'utf-8') as f:
f.write(json.dumps(issues))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment