Skip to content

Instantly share code, notes, and snippets.

@mattfield11
Last active August 30, 2017 11:23
Show Gist options
  • Save mattfield11/c7ed565b299028986e6d1be4c8c63ca4 to your computer and use it in GitHub Desktop.
Save mattfield11/c7ed565b299028986e6d1be4c8c63ca4 to your computer and use it in GitHub Desktop.
Pywikibot to elasticsearch
import pywikibot
from pywikibot import pagegenerators
from elasticsearch import Elasticsearch
import json
es = Elasticsearch()
data = {}
site = pywikibot.Site()
cat = pywikibot.Category(site,'Category:Living people')
gen = pagegenerators.CategorizedPageGenerator(cat)
for page in gen:
#Do something with the page object, for example:
data['text'] = page.text
data['title']=page.title()
#data['extlinks']=page.extlinks()
data['fullurl']=page.full_url()
categories= page.categories()
categorylist=[]
for category in categories:
categorystr=str(category)
categorystr.replace(":",";")
categorylist.append(categorystr)
data['categories']=categorylist
suggest=[{"input":data['title'],"weight":1}]
data['suggest']=suggest
doc=json.dumps(data)
print (doc)
res = es.index(index="wikipeople", doc_type='wiki_page', body=doc)
print(res['created'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment