-
-
Save cletusc/8d26902f46fe6b3ed850 to your computer and use it in GitHub Desktop.
IMDb Movie Parser started at http://userscripts.org/topics/132241
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name IMDb Movie Parser | |
// @namespace http://userscripts.org/topics/132241 | |
// @description Creates a textarea at the bottom of the page with formatted text. | |
// @include http://www.imdb.com/title/* | |
// @version 1.0.3 | |
// @grant none | |
// ==/UserScript== | |
(function () { | |
'use strict'; | |
if (window.frameElement) { | |
return; | |
} | |
var param, | |
movieData = {}, | |
movieParams = { | |
IMDbLink: function () { | |
return location.href; | |
}, | |
Title: function () { | |
var e = $qs('h1 span[itemprop="name"], #tn15title h1'); | |
if (!e) {return;} | |
if ($qs('a[href*="year"]'), e) { | |
e = e.firstChild; | |
} | |
return trimFix(e.textContent); | |
}, | |
Year: function () { | |
var e = $qs('h1 span[itemprop="name"] + span a, #tn15title a[href*="/year/"]'); | |
if (!e) {return;} | |
return trimFix(e.textContent); | |
}, | |
Description: function () { | |
var e = $qs('#overview-top [itemprop="description"]'); | |
if (!e) {return;} | |
return trimFix(e.textContent); | |
}, | |
Storyline: function () { | |
var e = $qs('#titleStoryLine [itemprop="description"] p'); | |
if (!e) {return;} | |
return trimFix(e.textContent); | |
}, | |
Poster: function () { | |
var e = $qs('#img_primary img, #primary-poster'); | |
if (!e) {return;} | |
return e.src; | |
}, | |
Cast: function () { | |
var e = $qsa('.cast_list tr:not(:first-child), .cast tr'), | |
cast = []; | |
if (!e) {return;} | |
e.forEach(function (e) { | |
var person = { | |
name: trimFix($qs('[itemprop="actor"] a span, .nm', e).textContent), | |
photo: $qs('.primary_photo img, .hs img', e).src, | |
link: $qs('[itemprop="actor"] a, .nm a', e).href, | |
character: trimFix($qs('.character, .char', e).textContent) | |
}; | |
cast.push(person); | |
}); | |
return cast; | |
}, | |
details: function () { | |
var e = $qsa('#overview-top h4, #titleDetails h4, #titleStoryLine h4, #tn15content h5'), | |
details = {}; | |
if (!e) {return;} | |
e.forEach(function (e) { | |
var detail = [], | |
name = trimFix(e.textContent.replace(':', '')); | |
e = e.nextSibling; | |
while (e) { | |
if (e.classList && e.classList.contains('info-content')) { | |
e = e.firstChild; | |
continue; | |
} | |
if (e.classList && (e.classList.contains('see-more') || e.classList.contains('ghost') || e.classList.contains('tn15more'))) { | |
e = e.nextSibling; | |
continue; | |
} | |
var eText = trimFix(e.textContent.replace('»', '')); | |
if (name === "Plot") { | |
console.log({ | |
e: e, | |
eText: eText | |
}); | |
} | |
if ( | |
eText === '' || | |
eText === ',' || | |
eText === '|' | |
) { | |
e = e.nextSibling; | |
continue; | |
} | |
if (e.href) { | |
detail.push({ | |
text: eText, | |
link: e.href | |
}); | |
} | |
else { | |
detail.push(eText); | |
} | |
e = e.nextSibling; | |
} | |
details[name] = detail; | |
}); | |
return details; | |
} | |
}; | |
// Run callbacks on each param and replace it on the main object. | |
for (param in movieParams) { | |
if (movieParams.hasOwnProperty(param)) { | |
movieData[param] = movieParams[param](); | |
} | |
} | |
// Merge generic details container and delete it. | |
movieData = merge(movieData, movieData.details); | |
delete movieData.details; | |
console.log(movieData); | |
// The format displayed in the text box. | |
var format = [ | |
'[size=6]%Title%[/size]', | |
'[img]%Poster%[/img]', | |
'%IMDbLink%', | |
'', | |
'[b]Year:[/b] %Year%', | |
'[b]Runtime:[/b] %Runtime%', | |
'[b]Genre(s):[/b] %Genres%', | |
'[b]Language(s):[/b] %Language%', | |
'[b]Country:[/b] %Country%', | |
'[b]Plot:[/b] [size=3]%Description%[/size]', | |
'', | |
'[b]Director(s):[/b]', | |
'%Director%', | |
'', | |
'[b]Writer(s):[/b]', | |
'%Writers%', | |
'', | |
'[b]Cast:[/b]', | |
'%Cast%', | |
'', | |
].join('\n'); | |
// How certain tokens should be separated. | |
var separators = { | |
'Cast': '\n', | |
'Director': '\n', | |
'Writers': '\n', | |
'default': ', ' | |
}; | |
// Whether links found in tokens should be linkifyed. | |
var shouldLink = [ | |
'Director', | |
'Writers' | |
]; | |
// Preprocess movie data for our use. | |
movieData.Cast.forEach(function (castMember, idx, arr) { | |
arr[idx] = '[url=' + castMember.link + ']' + castMember.name + '[/url] - ' + castMember.character; | |
}); | |
if (movieData.Plot) { | |
movieData.Description = movieData.Plot; | |
} | |
if (movieData.Genre) { | |
movieData.Genres = movieData.Genre; | |
} | |
// Use user-written description if possible. | |
if (movieData.Storyline) { | |
movieData.Description = movieData.Storyline; | |
} | |
function preprocessMovieDataParam(e, idx, arr) { | |
// Do any preprocessing of array elements. | |
// if (this === 'Writers') { | |
// ... | |
// } | |
if (typeof e === 'string') { | |
return; | |
} | |
if (e.text && e.link) { | |
if (shouldLink.indexOf(this) > -1) { | |
arr[idx] = '[url=' + e.link + ']' + e.text + '[/url]'; | |
} | |
else { | |
arr[idx] = e.text; | |
} | |
if (arr[idx + 1] && typeof arr[idx + 1] === 'string') { | |
arr[idx] += ' ' + arr[idx + 1]; | |
delete arr[idx + 1]; | |
} | |
} | |
} | |
for (param in movieData) { | |
if (movieData.hasOwnProperty(param)) { | |
if (typeof movieData[param] === 'undefined') { | |
continue; | |
} | |
if (typeof movieData[param] === 'string') { | |
// Do any preprocessing of the string-based tokens. | |
if (param === 'Poster') { | |
movieData[param] = movieData[param].replace(/@@.+?(\.\w+)$/, '@@$1'); | |
} | |
continue; | |
} | |
movieData[param].forEach(preprocessMovieDataParam, param); | |
movieData[param] = cleanArray(movieData[param]); | |
movieData[param] = movieData[param].join(separators[param] || separators['default']); | |
} | |
} | |
format = tokenize(format, movieData, '-'); | |
// Add textarea to display data. | |
var textarea = document.createElement('textarea'); | |
textarea.style.width = '90%'; | |
textarea.addEventListener('click', function () { | |
this.select(); | |
}, false); | |
$qs('#footer').appendChild(textarea); | |
textarea.textContent = format; | |
// Helper functions. | |
//------------------ | |
// querySelector. | |
function $qs(selector, context){context=context||document;return context.querySelector(selector);} | |
// querySelectorAll with slice. | |
function $qsa(selector, context){context=context||document;return [].slice.call(context.querySelectorAll(selector));} | |
// Remove weird whitespace on textContent. | |
function trimFix(str){return str.trim().replace(/\\n/g,'').replace(/\s+/g,' ');} | |
// Merge objects with later arguments overwriting previous. | |
function merge(){var a={};[].slice.call(arguments).forEach(function(arg){for(var attr in arg){if(arg.hasOwnProperty(attr)){a[attr]=arg[attr];}}});return a;} | |
// Tokenizer. | |
function tokenize(str,repl,def){return str.replace(/%(.*?)%/g,function(a,b){if(repl.hasOwnProperty(b)&&typeof repl[b]==='string'){return repl[b];}return def||a;});} | |
// Array cleaner. | |
function cleanArray(arr){return arr.filter(function(n){return n;});} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment