Skip to content

Instantly share code, notes, and snippets.

@briandfoy
Created August 30, 2015 16:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save briandfoy/9caa576adc8e15e48b13 to your computer and use it in GitHub Desktop.
Save briandfoy/9caa576adc8e15e48b13 to your computer and use it in GitHub Desktop.
#!/Users/brian/bin/perls/perl5.22.0
use open qw(:std :utf8);
use v5.10;
use Mojo::UserAgent;
use Devel::Peek qw(Dump);
use Encode qw(decode find_encoding);
use HTML::HeadParser;
my $ua = Mojo::UserAgent->new;
# The problem URL
my $url = 'http://blogs.perl.org/users/patch/2015/07/noirin-plunkett.html';
my $tx = $ua->get( $url );
my $res = $tx->res;
# blogs.perl.org doesn't return an encoding in the HTTP header
my $type_in_header = $res->headers->header('content-type');
say "Got type [$type_in_header] from header";
my $charset_in_header;
if( ( $charset_in_header ) = $type_in_header =~ /;\s+charset=(\S+)/ ) {
say "Charset from header is $charset_in_header";
}
my $ct = $res->content->charset;
say "Charset from method before meta inspection is [$ct]";
# so let's look in the <head>
process_meta( $tx );
$ct = $res->content->charset;
say "Charset from method after meta inspection is [$ct]";
# create an encoding object from the first defined value
my $encoding = find_encoding( $ct );
die "Could not discover encoding\n" unless $encoding;
# Now, take the octets from the raw response and decode them into
# its Perl string form. ->body is the raw octets.
my $decoded_body = $encoding->decode( $res->body );
# Now it should be okay inside Perl
# Note that ->text assumes UTF-8 unless there's something else
say $res->text;
sub process_meta {
my( $tx ) = @_;
my $headers = $tx->res->headers;
my( %meta ) =
$tx
->res
->dom
->find( 'head meta[http-equiv]' )
->map( sub {
$headers->header( $_->{'http-equiv'}, $_->{'content'} );
} );
$tx;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment