Instantly share code, notes, and snippets.

@Ladsgroup /coc2.py
Last active Nov 26, 2017

Embed
What would you like to do?
Cochrane bot
# License: MIT
import pywikibot
import re
import urllib2
from pywikibot import pagegenerators
site = pywikibot.Site('en')
generator = pagegenerators.SearchPageGenerator('insource:/\| *journal *= *.+Cochrane/', site=site, namespaces=[0])
gen = pagegenerators.PreloadingGenerator(generator)
def update_report(page, old_pmid, new_pmid, ):
report = pywikibot.Page(site, 'Wikipedia:WikiProject Medicine/Cochrane update/August 2017')
report_text = report.get()
rep = u'\n*Article [[%s]] ([{{fullurl:%s|action=edit}} edit]) old review [https://www.ncbi.nlm.nih.gov/pubmed/%s PMID:%s] new review [https://www.ncbi.nlm.nih.gov/pubmed/%s PMID:%s]' % (page.title(), page.title(),old_pmid, old_pmid, new_pmid, new_pmid)
if rep in report_text:
return
report.text = report_text + rep + u' - ~~~~~'
report.save('Bot: Update report')
for page in gen:
try:
text = page.get()
except:
continue
if '<!-- No update needed -->' in text:
continue
pmids = re.findall(r'\|\s*?pmid\s*?\=\s*?(\d+?)\s*?\|', text)
print len(pmids)
for pmid in pmids:
try:
res = urllib2.urlopen('https://www.ncbi.nlm.nih.gov/pubmed/%s' % pmid).read().decode('utf-8')
except:
continue
if 'WITHDRAWN' in res:
continue
if re.search(r'<h3>Update in</h3><ul><li class="comments"><a href="/pubmed/\d+?"', res):
pm = re.findall(r'<h3>Update in</h3><ul><li class="comments"><a href="/pubmed/(\d+?)"', res)[0]
up = u'{{Update inline|reason=Updated version https://www.ncbi.nlm.nih.gov/pubmed/' + pm
if not up in text:
text = re.sub(ur'(\|\s*?pmid\s*?\=\s*?%s\s*?(?:\||\}\}).*?\< *?\/ *?ref *?\>)' % pmid,ur'\1%s}}' % up, text, re.DOTALL)
update_report(page, pmid, pm)
if text != page.text:
page.text = text
page.save(u'Bot: Adding "update inline" template')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment