Skip to content

Instantly share code, notes, and snippets.

@vssun
Last active Dec 19, 2015
Embed
What would you like to do?
For merging lonely pages for wikidata
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
For wikidata use
To merge lonely ml pages to appropriate item
Written by: Vssun
2013-07-07
"""
import wikipedia
import codecs
#Main program starts here
#Customization constants follows
siteFamily = 'wikidata'
siteLangCode = 'wikidata'
languageCode='ml'
workingSite="wikipedia:ml"
defaultDescription=u"വിക്കിപീഡിയ വർഗ്ഗം" #Description to add on each page
inputFileName = "input.csv" #List of pages should be a csv file. Each rows in the format "SOURCEITEM,TARGETITEM"
outputFileName = "output.csv" #List of processed pages will be appended to the output file
maxPages=10 #How many pages
#End of Customization constants
myDataFile=codecs.open(inputFileName,encoding='utf-8', mode='r')
myOutputFile=codecs.open(outputFileName,encoding='utf-8', mode='a')
wikiSite = wikipedia.Site(code=siteLangCode, fam=siteFamily)
myLoopCount=0
myEditCount=0
myOutput=""
for myLine in myDataFile:
mySourceItem=myLine[0:myLine.find(",")]
myTargetItem=myLine[myLine.find(",")+1:]
wikipedia.output("Source item: " + mySourceItem + " Target:" + myTargetItem)
myDataPage = wikipedia.DataPage(wikiSite, mySourceItem)
wikipedia.output (myDataPage)
myLinks=myDataPage.interwiki()
if len(myLinks)==1 and myLinks[0].site.sitename()==workingSite:
print myLinks[0].title()
myTitle=myLinks[0].title()
myDataPage.setitem(summary=u"Removing interwiki",items={'type': u'sitelink', 'site': languageCode, 'title': ''})
myDataPage = wikipedia.DataPage(wikiSite, myTargetItem)
wikipedia.output (myDataPage)
myDataPage.setitem(summary=u"Adding interwiki",items={'type': u'sitelink', 'site': languageCode, 'title': myTitle})
myDataPage.setitem(summary=u"Adding label",items={'type': u'item', 'label': languageCode, 'value': myTitle})
myDataPage.setitem(summary=u"Adding description", items={'type': u'description', 'language': languageCode, 'value': defaultDescription})
myOutput=myOutput+myLine
myEditCount=myEditCount+1
myLoopCount=myLoopCount+1
wikipedia.output("\n\nLOOPCOUNT: " + str(myLoopCount) + " EDITCOUNT: " + str(myEditCount)+"\n\n")
if myEditCount>=maxPages:
break
myDataFile.close()
myOutputFile.write(myOutput)
myOutputFile.close()
wikipedia.stopme()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment