Skip to content

Instantly share code, notes, and snippets.

@cletusc
Last active December 25, 2015 10:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cletusc/8d26902f46fe6b3ed850 to your computer and use it in GitHub Desktop.
Save cletusc/8d26902f46fe6b3ed850 to your computer and use it in GitHub Desktop.
IMDb Movie Parser started at http://userscripts.org/topics/132241
// ==UserScript==
// @name IMDb Movie Parser
// @namespace http://userscripts.org/topics/132241
// @description Creates a textarea at the bottom of the page with formatted text.
// @include http://www.imdb.com/title/*
// @version 1.0.3
// @grant none
// ==/UserScript==
(function () {
'use strict';
if (window.frameElement) {
return;
}
var param,
movieData = {},
movieParams = {
IMDbLink: function () {
return location.href;
},
Title: function () {
var e = $qs('h1 span[itemprop="name"], #tn15title h1');
if (!e) {return;}
if ($qs('a[href*="year"]'), e) {
e = e.firstChild;
}
return trimFix(e.textContent);
},
Year: function () {
var e = $qs('h1 span[itemprop="name"] + span a, #tn15title a[href*="/year/"]');
if (!e) {return;}
return trimFix(e.textContent);
},
Description: function () {
var e = $qs('#overview-top [itemprop="description"]');
if (!e) {return;}
return trimFix(e.textContent);
},
Storyline: function () {
var e = $qs('#titleStoryLine [itemprop="description"] p');
if (!e) {return;}
return trimFix(e.textContent);
},
Poster: function () {
var e = $qs('#img_primary img, #primary-poster');
if (!e) {return;}
return e.src;
},
Cast: function () {
var e = $qsa('.cast_list tr:not(:first-child), .cast tr'),
cast = [];
if (!e) {return;}
e.forEach(function (e) {
var person = {
name: trimFix($qs('[itemprop="actor"] a span, .nm', e).textContent),
photo: $qs('.primary_photo img, .hs img', e).src,
link: $qs('[itemprop="actor"] a, .nm a', e).href,
character: trimFix($qs('.character, .char', e).textContent)
};
cast.push(person);
});
return cast;
},
details: function () {
var e = $qsa('#overview-top h4, #titleDetails h4, #titleStoryLine h4, #tn15content h5'),
details = {};
if (!e) {return;}
e.forEach(function (e) {
var detail = [],
name = trimFix(e.textContent.replace(':', ''));
e = e.nextSibling;
while (e) {
if (e.classList && e.classList.contains('info-content')) {
e = e.firstChild;
continue;
}
if (e.classList && (e.classList.contains('see-more') || e.classList.contains('ghost') || e.classList.contains('tn15more'))) {
e = e.nextSibling;
continue;
}
var eText = trimFix(e.textContent.replace('»', ''));
if (name === "Plot") {
console.log({
e: e,
eText: eText
});
}
if (
eText === '' ||
eText === ',' ||
eText === '|'
) {
e = e.nextSibling;
continue;
}
if (e.href) {
detail.push({
text: eText,
link: e.href
});
}
else {
detail.push(eText);
}
e = e.nextSibling;
}
details[name] = detail;
});
return details;
}
};
// Run callbacks on each param and replace it on the main object.
for (param in movieParams) {
if (movieParams.hasOwnProperty(param)) {
movieData[param] = movieParams[param]();
}
}
// Merge generic details container and delete it.
movieData = merge(movieData, movieData.details);
delete movieData.details;
console.log(movieData);
// The format displayed in the text box.
var format = [
'[size=6]%Title%[/size]',
'[img]%Poster%[/img]',
'%IMDbLink%',
'',
'[b]Year:[/b] %Year%',
'[b]Runtime:[/b] %Runtime%',
'[b]Genre(s):[/b] %Genres%',
'[b]Language(s):[/b] %Language%',
'[b]Country:[/b] %Country%',
'[b]Plot:[/b] [size=3]%Description%[/size]',
'',
'[b]Director(s):[/b]',
'%Director%',
'',
'[b]Writer(s):[/b]',
'%Writers%',
'',
'[b]Cast:[/b]',
'%Cast%',
'',
].join('\n');
// How certain tokens should be separated.
var separators = {
'Cast': '\n',
'Director': '\n',
'Writers': '\n',
'default': ', '
};
// Whether links found in tokens should be linkifyed.
var shouldLink = [
'Director',
'Writers'
];
// Preprocess movie data for our use.
movieData.Cast.forEach(function (castMember, idx, arr) {
arr[idx] = '[url=' + castMember.link + ']' + castMember.name + '[/url] - ' + castMember.character;
});
if (movieData.Plot) {
movieData.Description = movieData.Plot;
}
if (movieData.Genre) {
movieData.Genres = movieData.Genre;
}
// Use user-written description if possible.
if (movieData.Storyline) {
movieData.Description = movieData.Storyline;
}
function preprocessMovieDataParam(e, idx, arr) {
// Do any preprocessing of array elements.
// if (this === 'Writers') {
// ...
// }
if (typeof e === 'string') {
return;
}
if (e.text && e.link) {
if (shouldLink.indexOf(this) > -1) {
arr[idx] = '[url=' + e.link + ']' + e.text + '[/url]';
}
else {
arr[idx] = e.text;
}
if (arr[idx + 1] && typeof arr[idx + 1] === 'string') {
arr[idx] += ' ' + arr[idx + 1];
delete arr[idx + 1];
}
}
}
for (param in movieData) {
if (movieData.hasOwnProperty(param)) {
if (typeof movieData[param] === 'undefined') {
continue;
}
if (typeof movieData[param] === 'string') {
// Do any preprocessing of the string-based tokens.
if (param === 'Poster') {
movieData[param] = movieData[param].replace(/@@.+?(\.\w+)$/, '@@$1');
}
continue;
}
movieData[param].forEach(preprocessMovieDataParam, param);
movieData[param] = cleanArray(movieData[param]);
movieData[param] = movieData[param].join(separators[param] || separators['default']);
}
}
format = tokenize(format, movieData, '-');
// Add textarea to display data.
var textarea = document.createElement('textarea');
textarea.style.width = '90%';
textarea.addEventListener('click', function () {
this.select();
}, false);
$qs('#footer').appendChild(textarea);
textarea.textContent = format;
// Helper functions.
//------------------
// querySelector.
function $qs(selector, context){context=context||document;return context.querySelector(selector);}
// querySelectorAll with slice.
function $qsa(selector, context){context=context||document;return [].slice.call(context.querySelectorAll(selector));}
// Remove weird whitespace on textContent.
function trimFix(str){return str.trim().replace(/\\n/g,'').replace(/\s+/g,' ');}
// Merge objects with later arguments overwriting previous.
function merge(){var a={};[].slice.call(arguments).forEach(function(arg){for(var attr in arg){if(arg.hasOwnProperty(attr)){a[attr]=arg[attr];}}});return a;}
// Tokenizer.
function tokenize(str,repl,def){return str.replace(/%(.*?)%/g,function(a,b){if(repl.hasOwnProperty(b)&&typeof repl[b]==='string'){return repl[b];}return def||a;});}
// Array cleaner.
function cleanArray(arr){return arr.filter(function(n){return n;});}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment