Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Extract famous vermonters and their QIDs from wikipedia
const wtf = require('wtf_wikipedia')
const rp = require('request-promise');
const createCsvWriter = require('csv-writer').createObjectCsvWriter;
const csvWriter = createCsvWriter({
path: 'famous_vermonters.csv',
header: [
{id: 'qid', title: 'qid'},
{id: 'name', title: 'name'}
]
});
function parseWikiResponse(resp) {
var firstPage = Object.values(resp.query.pages)[0]
if ('pageprops' in firstPage) {
return firstPage.pageprops.wikibase_item
} else {
return null
}
}
function getNamesAndQids() {
var vermonters = []
var ps = []
wtf.fetch('List_of_people_from_Vermont').then((doc) => {
console.log("RUNNING")
doc.json().sections.map((section) => {
if (/^[A-Z]$/.test(section.title)) {
var section_list = [].concat.apply([], section.lists);
section_list.map((item) => {
var wikiURL = encodeURI(`https://en.wikipedia.org/w/api.php?action=query&prop=pageprops&titles=${item.links[0].page}&format=json`)
vermonters.push(item.text)
ps.push(rp({
uri: wikiURL,
json: true
}))
})
}
})
// https://stackoverflow.com/a/39507833/4355916
return Promise.all(ps)
.then((results) => {
csvWriter.writeRecords(results.map((e, i) => (
{
"name": vermonters[i],
"qid": parseWikiResponse(e)
})))
}).catch(err => console.log(err))
})
}
getNamesAndQids()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment