Skip to content

Instantly share code, notes, and snippets.

Last active August 29, 2015 14:07
Show Gist options
  • Save ksc91u/73f9ee5fd722c6eedd0c to your computer and use it in GitHub Desktop.
Save ksc91u/73f9ee5fd722c6eedd0c to your computer and use it in GitHub Desktop.
Extract subtitles from zip and conv to UTF8
use File::Basename;
use Encode;
require Encode::Detect;
use base qw(Encode::Encoding);
use Encode qw(find_encoding);
use Encode::Detect::Detector;
use URL::Encode qw(url_encode_utf8 url_encode);
$dest = $ARGV[0];
$zip = $ARGV[1];
$dest = $ARGV[0];
system("mkdir /tmp/sub");
for($j=1; $j<=$#ARGV; $j++){
system("cp '$ARGV[$j]' /tmp/sub");
system("rm -rf /tmp/sub");
sub getSub{
system("mkdir /tmp/sub");
my $f = shift;
($filename, $directories, $suffix) = fileparse($f);
$filename = $1;
print $1;
$url = "".url_encode($1);
$out = `curl -k $url`;
my @matches = $out =~ /class="introtitle".*?href="(.*?\.xml)"/g;
for $m (@matches){
$out=`curl -k$m`;
sub moveTo{
$f = shift;
($filename, $directories, $suffix) = fileparse($f);
@filename_parts = split /\./,$filename;
pop @filename_parts;
$episode_name = join(".",@filename_parts);
opendir ( DIR, "/tmp/sub") || die "Error in opendir";
@files = grep {/\.(ssa|ass|aas|srt)$/} readdir(DIR) ;
foreach $subname (@files){
@subname_parts = split /\./,$subname;
$s = $#subname_parts;
$sub_name = join(".", ($episode_name, $subname_parts[$s-1], $subname_parts[$s] ));
system("mv -f \"/tmp/sub/$subname\" \"$directories/$sub_name\"");
closedir DIR;
sub extract{
$z = shift;
system("mkdir /tmp/sub");
# system("unrar e $z /tmp/sub");
# system("unzip $z -d /tmp/sub");
# system("tar xvf $z -C /tmp/sub");
system("unar -no-directory -output-directory /tmp/sub $z");
system("cd /tmp/sub; for i in `find . -type d|grep -v \"\\.\$\"`; do mv \$i/*.ass \$i/*.srt \$i/*.aas /tmp/sub/; done");
system("rm -f /tmp/sub/*简体*");
#system("rm -f /tmp/sub/*gb*");
#system("rm -f /tmp/sub/*lol*");
sub guess_encoding{
my $filename = shift;
local $/=undef;
open FILE, $filename or die "Couldn't open file: $!";
$string = <FILE>;
my $encoding = detect($string);
if(!defined($encoding) || length($encoding) < 3){
return "ISO8859-1";
return $encoding;
sub conv{
opendir ( DIR, "/tmp/sub") || die "Error in opendir";
@files = grep {/\.(ass|aas|srt)$/} readdir(DIR) ;
foreach $filename (@files){
$encoding = uc(&guess_encoding("/tmp/sub/$filename"));
if ($encoding=~/BIG/){
$encoding = "BIG5-HKSCS";
print "Encoding ... $encoding\n";
unless( !defined($encoding)){
system("iconv -f $encoding -t utf-8 \"/tmp/sub/$filename\" > /tmp/sub/123");
system("mv -f /tmp/sub/123 \"/tmp/sub/$filename\"");
closedir DIR;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment