Skip to content

Instantly share code, notes, and snippets.

@remotesynth
Last active December 5, 2015 15:29
Show Gist options
  • Save remotesynth/401af3e2a4525677830a to your computer and use it in GitHub Desktop.
Save remotesynth/401af3e2a4525677830a to your computer and use it in GitHub Desktop.
Parsing locations James Bond has been
var request = require("request"),
cheerio = require("cheerio"),
ProgressBar = require('progress'),
Knwl = require("knwl.js"),
apiURL = "http://jamesbond.wikia.com/api/v1/Articles/List?category=James_Bond_films&limit=50",
baseURL = "http://jamesbond.wikia.com";
console.log("Target: James Bond");
// get the full list of movies from the API
request(apiURL, function (error, response, body) {
var movieURL,
allMovies,
movie,
knownLocations = {},
completedRequests = 0,
bar,
knwlInstance = new Knwl('english');
// instantiate the places plugin
knwlInstance.register('places', require('knwl.js/default_plugins/places'));
if (!error) {
allMovies = JSON.parse(body);
// processing takes a while, so load a progress bar to show the status
bar = new ProgressBar("Researching :bar", { total: allMovies.items.length });
// loop through each movie and get the contents of the wiki page
allMovies.items.forEach(function (movie) {
movieURL = baseURL + movie.url;
//request the wiki page
request(movieURL, function (error, response, body) {
var $,
movieDescription,
places,
place;
if (error) {
console.log("Couldn’t get page because of error: " + error);
return;
}
bar.tick();
// this is to see how many items have completed
completedRequests++;
// parse the main description contents
$ = cheerio.load(body);
movieDescription = $("#mw-content-text").text();
// search the cotents for places
knwlInstance.init(movieDescription);
places = knwlInstance.get('places');
// collate all the places and how frequently each appears
places.forEach(function (place) {
if (!knownLocations[place.place]) {
knownLocations[place.place] = {};
knownLocations[place.place][movie.title] = 1;
}
else if (!knownLocations[place.place][movie.title]) {
knownLocations[place.place][movie.title] = 1;
}
else {
knownLocations[place.place][movie.title] += 1;
}
});
// if this is the last request result, output the contents
if (completedRequests == allMovies.items.length) {
console.log("Target known locations:");
for (var location in knownLocations) {
console.log(location);
for (var locationMovie in knownLocations[location]) {
console.log(" - " + locationMovie + " (" + knownLocations[location][locationMovie] + ")")
}
}
}
});
});
} else {
console.log("We’ve encountered an error: " + error);
}
});
@jeromelachaud
Copy link

very nice and fun experiment 👍
small typo here

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment