Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
"use strict";
var preq = require('preq');
var P = require('bluebird');
function load() {
return preq.get({
uri: 'https://en.wikipedia.org/w/api.php?action=query&list=recentchanges&rcprop=title&format=json'
})
.then((res) => {
var titles = res.body.query.recentchanges.map((change) => change.title);
return P.each(titles, (title) => {
console.log(title);
return preq.get({
uri: 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles='
+ title + '&rvlimit=500&format=json'
})
.then((res) => {
if (!res || !res.body || !res.body.query.pages || !Object.keys(res.body.query.pages).length) {
return;
}
var pageId = Object.keys(res.body.query.pages)[0];
if (!res.body.query.pages[pageId] || !res.body.query.pages[pageId].revisions) {
return;
}
var revisions = res.body.query.pages[pageId].revisions.map((rev) => rev.revid);
return P.all(revisions.map((revision) => {
return P.props({
revision,
body: preq.get({
uri: 'https://en.wikipedia.org/api/rest_v1/page/html/'
+ encodeURIComponent(title) + '/' + revision
})
.then((res) => res.body)
.catch(e => undefined)
});
}))
.then((results) => {
return P.each(results.filter(r => !!r), (result) => {
return P.join(preq.put({
uri: 'http://localhost:7231/en.wikipedia.org/sys/chunked_bucket/parsoid.html/'
+ encodeURIComponent(title) + '/' + result.revision,
headers: {
'content-type': 'text/html'
},
body: result.body
}),
preq.put({
uri: 'http://localhost:7231/en.wikipedia.org/sys/key_rev_value/parsoid.html/'
+ encodeURIComponent(title) + '/' + result.revision,
headers: {
'content-type': 'text/html'
},
body: result.body
}));
});
});
});
});
})
.then(() => load())
.catch((e) => {
return load();
});
}
load();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment