Last active
December 5, 2015 15:29
-
-
Save remotesynth/401af3e2a4525677830a to your computer and use it in GitHub Desktop.
Parsing locations James Bond has been
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var request = require("request"), | |
cheerio = require("cheerio"), | |
ProgressBar = require('progress'), | |
Knwl = require("knwl.js"), | |
apiURL = "http://jamesbond.wikia.com/api/v1/Articles/List?category=James_Bond_films&limit=50", | |
baseURL = "http://jamesbond.wikia.com"; | |
console.log("Target: James Bond"); | |
// get the full list of movies from the API | |
request(apiURL, function (error, response, body) { | |
var movieURL, | |
allMovies, | |
movie, | |
knownLocations = {}, | |
completedRequests = 0, | |
bar, | |
knwlInstance = new Knwl('english'); | |
// instantiate the places plugin | |
knwlInstance.register('places', require('knwl.js/default_plugins/places')); | |
if (!error) { | |
allMovies = JSON.parse(body); | |
// processing takes a while, so load a progress bar to show the status | |
bar = new ProgressBar("Researching :bar", { total: allMovies.items.length }); | |
// loop through each movie and get the contents of the wiki page | |
allMovies.items.forEach(function (movie) { | |
movieURL = baseURL + movie.url; | |
//request the wiki page | |
request(movieURL, function (error, response, body) { | |
var $, | |
movieDescription, | |
places, | |
place; | |
if (error) { | |
console.log("Couldn’t get page because of error: " + error); | |
return; | |
} | |
bar.tick(); | |
// this is to see how many items have completed | |
completedRequests++; | |
// parse the main description contents | |
$ = cheerio.load(body); | |
movieDescription = $("#mw-content-text").text(); | |
// search the cotents for places | |
knwlInstance.init(movieDescription); | |
places = knwlInstance.get('places'); | |
// collate all the places and how frequently each appears | |
places.forEach(function (place) { | |
if (!knownLocations[place.place]) { | |
knownLocations[place.place] = {}; | |
knownLocations[place.place][movie.title] = 1; | |
} | |
else if (!knownLocations[place.place][movie.title]) { | |
knownLocations[place.place][movie.title] = 1; | |
} | |
else { | |
knownLocations[place.place][movie.title] += 1; | |
} | |
}); | |
// if this is the last request result, output the contents | |
if (completedRequests == allMovies.items.length) { | |
console.log("Target known locations:"); | |
for (var location in knownLocations) { | |
console.log(location); | |
for (var locationMovie in knownLocations[location]) { | |
console.log(" - " + locationMovie + " (" + knownLocations[location][locationMovie] + ")") | |
} | |
} | |
} | |
}); | |
}); | |
} else { | |
console.log("We’ve encountered an error: " + error); | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
very nice and fun experiment 👍
small typo here