Skip to content

Instantly share code, notes, and snippets.

@ryoi432
Created January 25, 2015 13:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryoi432/6eb27fc69b26384d1259 to your computer and use it in GitHub Desktop.
Save ryoi432/6eb27fc69b26384d1259 to your computer and use it in GitHub Desktop.
addbookmarktitle
#!/usr/bin/perl
use warnings;
use strict;
use utf8;
use LWP::UserAgent;
use Mozilla::CA;
use Encode;
use Encode::Guess;
my $InputFile = $ARGV[0];
my $OutputFile = $ARGV[1];
open(my $InputHandle, "<$InputFile") or die $!;
open(my $OutputHandle, ">$OutputFile") or die $!;
my $n = 1;
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/5.0");
$ua->ssl_opts(SSL_ca_file => Mozilla::CA::SSL_ca_file());
$ua->timeout(10);
while(my $line = <$InputHandle>){
chomp($line);
# last if ($n > 4);
if ($line =~ /^(.*?)<A HREF=\"(.*?)\"(.*?)>.*?<\/A>$/) {
my ($mae, $url, $ushiro) = ($1, $2, $3);
my $req = HTTP::Request->new(GET => $url);
print "$n. $url ";
my $res = $ua->request($req);
if ($res->is_success) {
if ($res->content =~ /<title>(.*?)<\/title>/) {
my $title = $1;
my $decoder = guess_encoding($res->content, qw/shiftjis euc-jp iso-2022-jp 7bit-jis ascii/);
if (ref($decoder)) {
$title = $decoder->decode($title);
$title = encode('utf-8', $title);
} else {
$decoder = guess_encoding($res->content, qw/cp932/);
if (ref($decoder)) {
$title = $decoder->decode($title);
$title = encode('utf-8', $title);
}
}
print "[$title]\n";
print $OutputHandle "$mae<A HREF=\"$url\"$ushiro>$title</A>\n";
} else {
print "notitle\n";
print $OutputHandle "$line\n";
}
}
else {
print $res->status_line, "\n";
print $OutputHandle "$line\n";
}
$n++;
} else {
print $OutputHandle "$line\n";
}
}
close($InputHandle);
close($OutputHandle);
exit;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment