Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
query imdb searches to make a feed of the newest releases you are interested in. Just use the imdb website advanced search syntax and automagically get notified of new movies in your feed reader, you dont need to manually visit imdb anymore.
<?php
error_reporting (E_ALL);
ini_set ("display_errors", true);
$BASE = strtok (basename ($_SERVER["SCRIPT_NAME"], ".php"), "-");
function clear_dates ($str) {
return preg_replace ("/(\<updated\>)[0-9]{4}\-[0-9]{2}\-[0-9]{2}T[0-9]{2}\:[0-9]{2}\:[0-9]{2}\+[0-9]{2}\:[0-9]{2}(\<\/updated\>)/m", "\\1\\2", $str);
}
if (is_readable ("feedproxy.conf")) $conf = parse_ini_file ("feedproxy.conf", true);
for ($key = 1; $key < $_SERVER["argc"]; $key++) $params[] = $_SERVER["argv"][$key];
if (!($curl=curl_init()) || curl_errno($curl)) die ("ERROR curl init #".curl_errno($curl)." ".curl_error($curl)."\n");
if (!curl_setopt_array($curl, array (
CURLOPT_ENCODING => "",
CURLOPT_USERAGENT => "feedproxy (+http://sourceforge.net/projects/feedproxy/)",
CURLOPT_HTTPHEADER => array ("accept-language: en"),
CURLOPT_FAILONERROR => true,
CURLOPT_CONNECTTIMEOUT => 16,
CURLOPT_TIMEOUT => 32,
CURLOPT_RETURNTRANSFER => true,
)) || curl_errno($curl)) die ("ERROR curl setopt #".curl_errno($curl)." ".curl_error($curl)."\n");
foreach ($params as $param) {
$url = "http://www.imdb.com/search/title?count=100&$param";
set_time_limit (64);
if (!curl_setopt($curl, CURLOPT_URL, $url) || curl_errno($curl)) echo "ERROR curl setopt #".curl_errno($curl)." ".curl_error($curl)."\n";
else {
if (!($curlret=curl_exec($curl)) || curl_errno($curl)) echo "ERROR curl exec #".curl_errno($curl)." ".curl_error($curl)."\n";
else {
if (!($dom = new DOMDocument ())) die ("ERROR dom create\n");
$dom->strictErrorChecking = false;
@$dom->loadHTML ($curlret);
if (!($root = $dom->getElementById("main"))) die ("ERROR cannot find dom parent\n");
foreach ($root->getElementsByTagName("div") as $parent) if ($parent->getAttribute("class")=="lister-list") {
foreach ($parent->childNodes as $child) if ($child->nodeName=="div") if ($child->getAttribute("class")=="lister-item mode-advanced") {
$entry = null;
foreach ($child->getElementsByTagName("div") as $div) {
if ($div->getAttribute("class")=="lister-item-image float-left") {
foreach ($div->getElementsByTagName("img") as $img) {
if (preg_match ("/^(.+?)(?:_[SU][XY][0-9]+)?(?:_CR[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+)?((?:_AL)?_?\.jpe?g)/i", $img->getAttribute("loadlate"), $reg)) $entry["image"]=$reg[1]."_SY640".$reg[2];
}
}
if ($div->getAttribute("class")=="inline-block ratings-imdb-rating") {
$entry["score"]=trim($div->nodeValue);
}
}
foreach ($child->getElementsByTagName("span") as $span) {
if ($span->getAttribute("class")=="lister-item-year text-muted unbold") {
$entry["year"]=preg_replace("/\(([0-9]{4})(?:\s*(?:Documentary|TV Special|TV Movie|Video|TV Short))?\)/i","\\1",$span->nodeValue);
}
if ($span->getAttribute("class")=="genre") {
$entry["genre"]=trim($span->nodeValue);
}
}
foreach ($child->getElementsByTagName("a") as $a) {
if (preg_match("/^\/title\/tt([0-9]+)\/\?ref_\=adv_li_tt$/",$a->getAttribute("href"),$reg)) {
if (empty($entry["id"])) $entry["id"] = $reg[1];
if (empty($entry["title"])) $entry["title"]=$a->nodeValue;
}
}
foreach ($child->getElementsByTagName("p") as $p) {
if (preg_match ("/(writers?|directors?|stars?)\:/i", $p->nodeValue)) {
$entry["credit"]=preg_replace("/\s*\|\s*/","\n", trim(preg_replace("/[\r\n\t\s ]+/", " ", $p->nodeValue)));
}
if (strpos($p->getAttribute("class"),"sort-num_votes-visible")!==false) {
if (preg_match ("/\s*Votes\:\s*([0-9\,]+)\s*\|/", $p->nodeValue, $reg)) $entry["votes"] = str_replace (",", "", $reg[1]);
}
if ($p->getAttribute("class")=="text-muted" && $p->getElementsByTagName("span")->length == 0) {
$entry["outline"] = preg_replace("/(\.\.\.)\s*See\s*full\s*summary[^a-z]+$/", "\\1", trim($p->nodeValue));
}
}
if (empty ($entry["id"])) echo ("ERROR missing id @$url\n");
else {
foreach ($entry as $field => $value) {
if (empty ($value)) echo "WARNING missing $field at {$entry["id"]} on $url\n";
}
$entries[$entry["id"]] = $entry;
}
}
}
}
}
}
if (empty ($entries)) die ("ERROR no entries found\n");
$basepath = (!empty ($conf["base"]["url"]) ? $conf["base"]["url"] : "") . "$BASE.atom";
$TITLE = "IMDb best";
$hash = sha1 (implode ("\n", $params));
$feedname = "$BASE-$hash.atom";
$feedpath = (!empty ($conf["base"]["url"]) ? $conf["base"]["url"] : "") . "$feedname";
$now = date ("c");
$feedcont = <<<HEREDOC
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
<id>$feedpath</id>
<updated>$now</updated>
<title>$TITLE</title>
<link rel="alternate" href="http://www.imdb.com/search/title?count=100&amp;sort=release_date_us,desc" type="text/html" title="$TITLE" hreflang="en"/>
<link rel="self" href="$feedpath" type="application/atom+xml" title="$TITLE" hreflang="en"/>
<logo>http://ia.media-imdb.com/images/M/MV5BMjM1ODg1NzgxOV5BMl5BcG5nXkFtZTgwODIwNTg0MjE@._V1_.png</logo>
<icon>http://www.imdb.com/favicon.ico</icon>
<author>
<name>moli</name>
<uri>http://moli.hu</uri>
</author>
HEREDOC;
foreach ($entries as $id => $entry) {
$feedcont .= " <entry>\n";
$feedcont .= " <id>$basepath?".$id."</id>\n";
$feedcont .= " <updated>" . date ("c", 1) . "</updated>\n";
$feedcont .= " <title type=\"text\">" . htmlspecialchars ((!empty ($entry["title"]) ? $entry["title"] : "???") . (!empty ($entry["year"]) ? " ({$entry["year"]})" : ""), ENT_QUOTES, "UTF-8") . "</title>\n";
$feedcont .= " <content type=\"html\">" . htmlspecialchars ((!empty($entry["score"]) || !empty($entry["votes"]) ? htmlspecialchars((!empty($entry["score"]) ? $entry["score"] : "") . (!empty($entry["votes"]) ? " ({$entry["votes"]})" : ""),ENT_QUOTES,"UTF-8")."<br/>" : "") . (!empty($entry["genre"]) ? htmlspecialchars($entry["genre"],ENT_QUOTES,"UTF-8")."<br/>" : "") . (!empty($entry["credit"]) ? "<br/>".preg_replace("/[\r\n]/","<br/>",htmlspecialchars($entry["credit"],ENT_QUOTES,"UTF-8"))."<br/>" : "") . (!empty($entry["outline"]) ? "<br/>".htmlspecialchars($entry["outline"],ENT_QUOTES,"UTF-8")."<br/>" : "") . "<br/><a href=\"https://rarbg.to/torrents.php?page=1&order=size&by=DESC&imdb=tt$id\">RARBG torrents</a><br/>" . (!empty($entry["image"]) ? "<br/><img src=\"{$entry["image"]}\"/>" : ""), ENT_QUOTES,"UTF-8") . "</content>\n";
$feedcont .= " <link rel=\"alternate\" href=\"http://www.imdb.com/title/tt$id/\" type=\"text/html\" title=\"" . htmlspecialchars ((!empty ($entry["title"]) ? $entry["title"] : "???") . (!empty ($entry["year"]) ? " ({$entry["year"]})" : ""), ENT_QUOTES, "UTF-8") . "\" hreflang=\"en\"/>\n";
$feedcont .= " </entry>\n";
}
$feedcont .= "</feed>\n";
if (file_exists ($feedname) && filesize($feedname) && ($oldfeedcont = file_get_contents ($feedname)) && $oldfeedcont===false) die ("ERROR backup load\n");
if (empty($oldfeedcont) || clear_dates($feedcont)!=clear_dates($oldfeedcont)) {
if (!file_put_contents($feedname, $feedcont)) die ("ERROR write feed\n");
}
?>
Owner

phpmoli commented Jul 24, 2016

usage in command line like: php feedimdb.php 'sort=release_date_us,desc&title_type=feature&num_votes=15000,&user_rating=6.7,' 'sort=release_date_us,desc&title_type=feature&genres=sci_fi&num_votes=15000,&user_rating=5,' 'sort=release_date_us,desc&title_type=feature&genres=animation&num_votes=1500,&user_rating=6.7,' 'sort=release_date_us,desc&title_type=documentary&genres=comedy&num_votes=150,'
the result of this example will be an atom feed with 400 movies you have to "read" in your feed reader. After this first "initialization-step" you will only get notified of new releases only.

Owner

phpmoli commented Jul 29, 2016

updated to the new imdb redesign.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment