Skip to content

Instantly share code, notes, and snippets.

@conmame
Created February 12, 2013 01:28
Show Gist options
  • Save conmame/4759295 to your computer and use it in GitHub Desktop.
Save conmame/4759295 to your computer and use it in GitHub Desktop.
sub _parse_response
{
my $this = shift;
my $res = shift;
my $req = shift;
my $full_ch_name = $req->{full_ch_name};
$DEBUG and $this->_debug($req, "_parse_response.");
my $result = {
result => undef,
status_code => undef,
is_success => undef,
title => undef,
content_type => undef,
content_length => undef,
decoded_content => undef,
fetch_length => undef,
};
if( !ref($res) )
{
my $DEFAULT_LANG = 'ja';
my $lang = $this->config->lang || $DEFAULT_LANG;
my $msgmap = {};
if( $lang eq 'ja' )
{
$msgmap = {
error => 'エラー',
timeout => 'タイムアウト',
'no host to connect' => 'サーバが見つかりません',
'接続を拒否されました' => 'サーバに接続できませんでした',
'Connection refused' => 'サーバに接続できませんでした',
};
};
$result->{result} = "(".($msgmap->{error}||'error').") $req->{url} ".($msgmap->{$res}||$res);
return $result;
}
my $protocol = $res->{Protocol};
my $status_code = $res->{Code} || 0;
my $status_msg = $res->{Message};
my $headers = $res->{Header}; # hash-ref.
my $content = $res->{Content};
$result->{fetch_length} = defined($content) ? length($content) : undef;
defined($content) or $content = '';
my @opts;
$result->{status_code} = $status_code;
$result->{content_length} = $headers->{'Content-Length'};
if( !defined($result->{content_length}) && $res->{StreamState} eq 'finished' )
{
$result->{content_length} = length($content);
}
$DEBUG and $this->_debug($full_ch_name, "debug: fetch ".length($content)." bytes");
# extract Cookies;
if( $headers->{'Set-Cookie'} )
{
$this->_extract_cookies($req);
}
if( my $loc = $headers->{Location} )
{
$DEBUG and $this->_debug($full_ch_name, "debug: has Location header: $loc");
if( $loc =~ m{^\s*(\w+://[-.\w]+\S*)\s*$} )
{
$result->{redirect} = substr($loc, 0, length($1)); # keep taintness.
}elsif( $loc =~ m{^\s*((/?).*?(?:[#?].*)?)\s*$} )
{
my $path = substr($loc, 0, length($1)); # keep taintness.
my $is_abs = $2;
my $new_url = $req->{url};
$new_url =~ s{[#?].*}{};
if( $is_abs )
{
$new_url =~ s{^(\w+://[^/]+)/.*}{$1} or die "invalid req url(abs): $new_url";
}else
{
$new_url =~ s{^(\w+://[^/]+.*/).*}{$1} or die "invalid req url(rel): $new_url";
}
$result->{redirect} = $new_url . $path;
}else
{
$DEBUG and $this->_debug($full_ch_name, "debug: broken location url: $loc");
}
$DEBUG && $result->{redirect} and $this->_debug($full_ch_name, "debug: Location redirect: $result->{redirect}");
}
if( int($status_code / 100) != 2 && !$result->{redirect} )
{
$result->{title} = $status_msg;
push(@opts, "http status $status_code");
}
# detect refresh tag.
my $content2 = $content;
$content2 =~ s/<!--.*?-->//g;
if( $content2 =~ m{
<META(?:\s[^>]*?)?\s
(?:HTTP-EQUIV\s*=\s*(["'])refresh\1(?:\s[^>]*?)?\sCONTENT\s*=\s*(["'])(\d+)\s*;\s*URL=([^"'<]+)\2|
CONTENT\s*=\s*(["'])(\d+)\s*;\s*URL=([^"'<]+)\5(?:\s[^>]*?)?\sHTTP-EQUIV\s*=\s*(["'])refresh\8)
(?:\s[^>]*|/)?>
}ix )
{
my $after = $3 || $6;
my $url = $4 || $7;
$DEBUG and $this->_debug($full_ch_name, "debug: meta.refresh found: $after; $url");
$result->{redirect} = $url;
}
# detect encoding.
my $enc = 'auto';
if( $headers->{'Content-Type'} && $headers->{'Content-Type'} =~ /;\s*charset=(\S+)/ )
{
my $e = lc($1);
$enc = $e =~ /s\w*jis/ ? 'sjis'
: $e =~ /euc/ ? 'euc'
: $e =~ /utf-?8/ ? 'utf8'
: $e =~ /iso-2022-jp/ ? 'jis'
: $e =~ /\bjis\b/ ? 'jis'
: $enc;
$DEBUG and $this->_debug($full_ch_name, "debug: charset $enc from http-header ($e)");
}
if( $enc eq 'auto' && $content2 =~ m{
<meta(?:\s[^>]*?)?\s
(?:http-equiv\s*=\s*(["'])Content-Type\1(?:\s[^>]*?)?\scontent\s*=\s*(["'])\w+/\w+(?:\+\w+)*\s*;\s*charset=([-\w]+)\2|
content\s*=\s*(["'])\w+/\w+(?:\+\w+)*\s*;\s*charset=([-\w]+)\4(?:\s[^>]+?)?\shttp-equiv\s*=\s*(["'])Content-Type\6)
(?:\s[^>]*|/)?>
}ix )
{
my $e = lc($3 || $5);
$enc = $e =~ /s\w*jis/ ? 'sjis'
: $e =~ /euc/ ? 'euc'
: $e =~ /utf-?8/ ? 'utf8'
: $e =~ /iso-2022-jp/ ? 'jis'
: $e =~ /\bjis\b/ ? 'jis'
: $enc;
$DEBUG and $this->_debug($full_ch_name, "debug: charset $enc from meta ($e)");
}
if( $enc eq 'auto' )
{
my $guessed = $ENCODER->new->getcode($content);
$enc = $guessed ne 'unknown' ? $guessed : 'sjis';
$DEBUG and $this->_debug($full_ch_name, "debug: charset $enc from guess ($guessed)");
}
# drop broken utf-8 sequences.
if( $enc eq 'utf8' && $content =~ s{([\xe0-\xef][\x80-\xbf]?)(?=[\x00-\x7e])}{join('',map{sprintf("[%02x]",$_)}unpack("C*",$1))}eg )
{
$DEBUG and $this->_debug($full_ch_name, "debug: broken utf-8 found and fixed");
my $url = $req->{url};
$this->_log("broken utf-8 on $url (enc=$enc)");
$DEBUG and $this->_debug($req, "broken utf-8 on $url (enc=$enc)");
}
# decode.
$content = $ENCODER->new($content, $enc)->utf8;
$content2 = $ENCODER->new($content2, $enc)->utf8;
$result->{decoded_content} = $content;
my ($title) = $content2 =~ m{<title(?:\s[^<>]*)?>\s*(.*?)\s*</title\s*>}is;
$DEBUG && !$title and $this->_debug($full_ch_name, "debug: no title elements in document");
if( defined($title) )
{
$title = $this->_fixup_title($title);
$result->{title} = $title;
}else
{
$title = $result->{title};
}
my ($ctype) = split(/[ ;]/, $headers->{'Content-Type'}, 2);
$ctype ||= 'unknown/unkown';
$result->{content_type} = $ctype;
$DEBUG and $this->_debug($full_ch_name, "debug: content-type: $ctype");
my $reply = defined($title) ? $title : '';
if( $reply eq '' )
{
$DEBUG and $this->_debug($full_ch_name, "debug: check icecast");
if( my $icy_name = $headers->{'Icy-Name'} )
{
# Icecast.
my $desc = $headers->{'Icy-Description'};
my $bitrate = $headers->{'Icy-Br'};
$reply = $icy_name;
if( defined($bitrate) )
{
$reply .= " [${bitrate}k]";
}
if( defined($desc) && $desc ne $icy_name )
{
$reply .= " - $desc";
}
$reply = $ENCODER->new($reply,'auto')->utf8;
}
}
if( $ctype eq 'audio/x-mpegurl' && ($res->{StreamState} eq 'finished' || $res->{StreamState} eq 'body') )
{
if( $content =~ m{^(\w+://[-.\w:]+\S*)\s*\z} )
{
$result->{redirect} = substr($content, 0, length($1)); # keep taintness.
}
}
if( !$reply && $ctype eq 'audio/mpeg' && ($res->{StreamState} eq 'finished' || $res->{StreamState} eq 'body') )
{
if( $content =~ m{^ID3} && $HAS_TOOLS_ID3TAG )
{
# from raw content.
my $info = Tools::ID3Tag->extract($res->{Content});
#$DEBUG and $this->_debug($req, "ID3Tag.size = ".($info->{size} || '-')."/".length($content));
#$DEBUG and $this->_debug($req, "ID3Tag.version = ".($info->{version} || '-'));
#$DEBUG and $this->_debug($req, "ID3Tag.title = ".($info->{title} || '-'));
#$DEBUG and $this->_debug($req, "ID3Tag.album = ".($info->{album} || '-'));
#$DEBUG and $this->_debug($req, "ID3Tag.artist = ".($info->{artist} || '-'));
$reply = $info->{title} || 'no title';
if( $info->{album} )
{
$reply .= " / $info->{album}";
}
if( $info->{artist} )
{
$reply .= " ($info->{artist})";
}
}
}
if( $reply eq '' || $ctype !~ /html/ )
{
push(@opts, $ctype);
}
if( $ctype =~ m{^(?:image|video)/} && $HAS_IMAGE_EXIFTOOL )
{
$DEBUG and $this->_debug($full_ch_name, "debug: check image");
my @tags = qw(Title ImageSize Headline);
my $info = Image::ExifTool::ImageInfo(\$res->{Content}, @tags);
my $x = sub{ my $x=shift;$x=~s/([^ -~])/sprintf('[%02x]',unpack("C",$1))/ge;$x};
$DEBUG and $this->_debug($full_ch_name, "debug: - ".$x->(join(", ", %$info)));
if( $reply eq '' )
{
my ($key) = grep{$info->{$_}} qw(Title Headline);
my $decoded_key = $info->{"$key (1)"} && "$key (1)";
my $val = $info->{$decoded_key} || $info->{$key};
my $guessed = $decoded_key ? 'decoded' : $ENCODER->getcode($val);
my $enc = $guessed eq 'unknown' ? 'sjis' : $guessed;
$DEBUG and $this->_debug($full_ch_name, "debug: - $key ($enc/$guessed) ".$x->($val));
$reply ||= $decoded_key ? $info->{$decoded_key} : $ENCODER->new($val, $enc)->utf8;
}
if( $info->{ImageSize} )
{
push(@opts, $info->{ImageSize});
}
}
if( $reply eq '' || $ctype !~ /html/ )
{
my $len = $result->{content_length};
if( defined($len) )
{
$len =~ s/(?<=\d)(?=(\d\d\d)+(?!\d))/,/g;
$len = "$len bytes";
push(@opts, $len);
}
}
if( $req->{redirected} )
{
my $redirs = $req->{redirected}==1 ? 'redir' : 'redirs';
push(@opts, "$req->{redirected} $redirs");
}
if( $reply eq '' && $ctype =~ /text/ )
{
$reply = '(untitled)';
}
if( @opts )
{
$reply eq '' or $reply .= ' ';
$reply .= "(".join("; ", @opts).")";
}
$result->{is_success} = 1;
$result->{result} = $reply;
$result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment