Last active
September 9, 2015 10:44
-
-
Save adhocore/66582d7d8d4cf84a8940 to your computer and use it in GitHub Desktop.
A simple php cli tool to traverse movie directories and download the missing subtitles from subscene.com (suited for movies from yify torrents)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
/* | |
* A simple php cli tool to traverse movie directories | |
* and download the missing subtitles from subscene.com | |
* (Especially suited for movies from yify torrents) | |
* | |
* Usage: php sub.php /path/to/movies/ | |
* | |
* @author Jitendra Adhikari <jiten.adhikary@gmail.com> | |
*/ | |
/* START user config */ | |
// The path to cache metadata and subtitles for later use | |
$cach = 'D:/share/subtitles/'; | |
/* END user config */ | |
$isCli = 'cli' == php_sapi_name() or defined('STDIN'); | |
if (!$isCli) { | |
exit('To be run from Command Line Terminal only.'); | |
} | |
if ($argc < 2) { | |
$syntax = '<fullpath:string> <force:bool> <caching:bool>'; | |
exit( | |
"Usage 1: | |
php sub.php {$syntax} | |
Usage 2: (with sub.bat in path) | |
sub {$syntax} | |
"); | |
} | |
// Pad | |
$argv += ['', '', 0, 0]; | |
$path = $argv[1]; | |
$subt = 'http://subscene.com'; | |
$temp = $cach.'temp.zip'; | |
$opts = compact('subt', 'cach', 'temp'); | |
$vids = [ | |
'mp4', 'avi', 'mkv', 'm4v', 'flv', | |
'webm', 'mov', 'wmv', 'mpg', 'mpeg', | |
]; | |
$proc = []; | |
if (is_file($cach.'meta.json')) { | |
$proc = json_decode(file_get_contents($cach.'meta.json'), true); | |
} | |
function saveMeta($proc, $cach) | |
{ | |
static $saved = false; | |
if ($saved) { | |
return; | |
} | |
file_put_contents( | |
$cach.'meta.json', | |
json_encode(array_filter($proc), JSON_PRETTY_PRINT) | |
); | |
$saved = true; | |
} | |
function saveSrt($url, $opts) | |
{ | |
$saved = false; | |
extract($opts, EXTR_OVERWRITE); | |
if (preg_match( | |
'~<a href="(/subtitle/download[^"]+)"~s', | |
@file_get_contents($subt.$url), | |
$link | |
)) { | |
file_put_contents( | |
$temp, | |
@file_get_contents($subt.$link[1]) | |
); | |
$zip = new ZipArchive(); | |
if ($zip->open($temp) === true) { | |
$fn = $zip->getNameIndex(0); | |
if ($fn and | |
copy("zip://{$temp}#{$fn}", $cacheFile) and | |
copy($cacheFile, $srtFile) | |
) { | |
$saved = true; | |
echo ' :: SAVED'; | |
} | |
$zip->close(); | |
} | |
} | |
return $saved; | |
} | |
echo PHP_EOL; | |
$count = $eCount = $sCount = $oCount = $cCount = 0; | |
foreach (new RecursiveIteratorIterator( | |
new RecursiveDirectoryIterator($path), | |
RecursiveIteratorIterator::SELF_FIRST | |
) as $key => $file | |
) { | |
$srtFile = str_replace('.'.$file->getExtension(), '.srt', $file); | |
if ($file->isFile() and | |
in_array($file->getExtension(), $vids) | |
) { | |
$film = basename(dirname($file)); | |
if (strlen($film) < 5) { | |
continue; | |
} | |
++$count; | |
$cacheFile = $cach.basename($srtFile); | |
echo str_pad(substr($film, 0, 69).' ', 70, '-'); | |
if (is_file($srtFile)) { | |
++$oCount; | |
if (!is_file($cacheFile) and !is_file($cach.$film)) { | |
copy($srtFile, $cacheFile); | |
echo ' :: CACHE', PHP_EOL; | |
} else { | |
echo ' :: OK', PHP_EOL; | |
} | |
continue; | |
} | |
if ($argv[3] == 1 or (isset($proc[$film]) and $argv[2] != 1)) { | |
++$sCount; | |
echo ' :: SKIP', PHP_EOL; | |
continue; | |
} | |
$proc[$film] = 0; | |
preg_match('`\(\d{4}\)`', $film, $year); | |
$year = trim(reset($year), '()'); | |
$name = implode('-', array_map(function ($p) { | |
return preg_replace('`[\W]+`', '', trim(strtolower($p))); | |
}, preg_split('/\s+/', strstr($film, ' (', true)))); | |
if (($a = is_file($cacheFile)) or is_file($cach.$film)) { | |
if (!$a) { | |
$cacheFile = $cach.$film; | |
} | |
if (copy($cacheFile, $srtFile)) { | |
++$oCount; | |
echo ' :: SAVED'; | |
} | |
echo PHP_EOL; | |
continue; | |
} | |
$html2 = ''; | |
$saved = false; | |
$html = @file_get_contents("http://subscene.com/subtitles/{$name}/english"); | |
$yify = '~<a href="([^"]+)">\s+<span.*?English.*?</span>\s+<span>\s+.*?YIFY\s+</span>\s+</a>~ms'; | |
$othr = '~<a href="([^"]+)">\s+<span.*?English.*?</span>\s+<span>\s+.*?\s+</span>\s+</a>~ms'; | |
$opts = array_merge($opts, compact('srtFile', 'cacheFile')); | |
// Four priority order: | |
// #1. name/yifi | |
if (preg_match($yify, $html, $ancr)) { | |
$saved = saveSrt($subt.$ancr[1], $opts); | |
} | |
// #2. name-year/yifi | |
if (!$saved and $year) { | |
$html2 = @file_get_contents("http://subscene.com/subtitles/{$name}-{$year}/english"); | |
if (preg_match($yify, $html2, $ancr)) { | |
$saved = saveSrt($ancr[1], $opts); | |
} | |
} | |
// #3. name/othr | |
if (!$saved and preg_match($othr, $html, $ancr)) { | |
$saved = saveSrt($ancr[1], $opts); | |
} | |
// #4. name-year/othr | |
if (!$saved and $year) { | |
if (!$html2) { | |
$html2 = @file_get_contents("http://subscene.com/subtitles/{$name}-{$year}/english"); | |
} | |
if (preg_match($othr, $html2, $ancr)) { | |
$saved = saveSrt($ancr[1], $opts); | |
} | |
} | |
if (!$saved) { | |
++$eCount; | |
$proc[$film] = 1; | |
echo ' :: ERROR'; | |
} else { | |
++$cCount; | |
} | |
echo PHP_EOL; | |
} | |
} | |
ksort($proc); | |
saveMeta($proc, $cach); | |
echo "\nOperation Completed Normally. | |
Total : {$count} | |
Okay : {$oCount} | |
Download : {$cCount} | |
Skip : {$sCount} | |
Error : {$eCount} | |
"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment