Skip to content

Instantly share code, notes, and snippets.

@ksc91u
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ksc91u/73f9ee5fd722c6eedd0c to your computer and use it in GitHub Desktop.
Save ksc91u/73f9ee5fd722c6eedd0c to your computer and use it in GitHub Desktop.
Extract subtitles from zip and conv to UTF8
#!/usr/bin/perl
use File::Basename;
use Encode;
require Encode::Detect;
use base qw(Encode::Encoding);
use Encode qw(find_encoding);
use Encode::Detect::Detector;
use URL::Encode qw(url_encode_utf8 url_encode);
if($ARGV[1]=~m/(rar)|(zip)/i){
$dest = $ARGV[0];
$zip = $ARGV[1];
&extract($zip);
}else{
$dest = $ARGV[0];
system("mkdir /tmp/sub");
for($j=1; $j<=$#ARGV; $j++){
system("cp '$ARGV[$j]' /tmp/sub");
}
}
#&getSub($ARGV[0]);
&conv();
&moveTo($ARGV[0]);
system("rm -rf /tmp/sub");
sub getSub{
system("mkdir /tmp/sub");
my $f = shift;
($filename, $directories, $suffix) = fileparse($f);
$filename=~m/(.*S\d\dE\d\d).*/;
$filename = $1;
print $1;
$url = "http://shooter.cn/search2/".url_encode($1);
$out = `curl -k $url`;
my @matches = $out =~ /class="introtitle".*?href="(.*?\.xml)"/g;
for $m (@matches){
$out=`curl -k http://shooter.cn/$m`;
}
}
sub moveTo{
$f = shift;
($filename, $directories, $suffix) = fileparse($f);
@filename_parts = split /\./,$filename;
pop @filename_parts;
$episode_name = join(".",@filename_parts);
opendir ( DIR, "/tmp/sub") || die "Error in opendir";
@files = grep {/\.(ssa|ass|aas|srt)$/} readdir(DIR) ;
foreach $subname (@files){
@subname_parts = split /\./,$subname;
$s = $#subname_parts;
$sub_name = join(".", ($episode_name, $subname_parts[$s-1], $subname_parts[$s] ));
system("mv -f \"/tmp/sub/$subname\" \"$directories/$sub_name\"");
}
closedir DIR;
}
sub extract{
$z = shift;
system("mkdir /tmp/sub");
#if($z=~m/\.rar$/){
# system("unrar e $z /tmp/sub");
#}elsif($z=~m/\.zip$/){
# system("unzip $z -d /tmp/sub");
#}else{
# system("tar xvf $z -C /tmp/sub");
#}
system("unar -no-directory -output-directory /tmp/sub $z");
system("cd /tmp/sub; for i in `find . -type d|grep -v \"\\.\$\"`; do mv \$i/*.ass \$i/*.srt \$i/*.aas /tmp/sub/; done");
system("rm -f /tmp/sub/*简体*");
#system("rm -f /tmp/sub/*gb*");
#system("rm -f /tmp/sub/*lol*");
}
sub guess_encoding{
my $filename = shift;
local $/=undef;
open FILE, $filename or die "Couldn't open file: $!";
$string = <FILE>;
my $encoding = detect($string);
if(!defined($encoding) || length($encoding) < 3){
return "ISO8859-1";
}else{
return $encoding;
}
}
sub conv{
opendir ( DIR, "/tmp/sub") || die "Error in opendir";
@files = grep {/\.(ass|aas|srt)$/} readdir(DIR) ;
foreach $filename (@files){
$encoding = uc(&guess_encoding("/tmp/sub/$filename"));
if ($encoding=~/BIG/){
$encoding = "BIG5-HKSCS";
}
print "Encoding ... $encoding\n";
unless( !defined($encoding)){
system("iconv -f $encoding -t utf-8 \"/tmp/sub/$filename\" > /tmp/sub/123");
}
system("mv -f /tmp/sub/123 \"/tmp/sub/$filename\"");
}
closedir DIR;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment