Created
February 12, 2013 01:28
-
-
Save conmame/4759295 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sub _parse_response | |
{ | |
my $this = shift; | |
my $res = shift; | |
my $req = shift; | |
my $full_ch_name = $req->{full_ch_name}; | |
$DEBUG and $this->_debug($req, "_parse_response."); | |
my $result = { | |
result => undef, | |
status_code => undef, | |
is_success => undef, | |
title => undef, | |
content_type => undef, | |
content_length => undef, | |
decoded_content => undef, | |
fetch_length => undef, | |
}; | |
if( !ref($res) ) | |
{ | |
my $DEFAULT_LANG = 'ja'; | |
my $lang = $this->config->lang || $DEFAULT_LANG; | |
my $msgmap = {}; | |
if( $lang eq 'ja' ) | |
{ | |
$msgmap = { | |
error => 'エラー', | |
timeout => 'タイムアウト', | |
'no host to connect' => 'サーバが見つかりません', | |
'接続を拒否されました' => 'サーバに接続できませんでした', | |
'Connection refused' => 'サーバに接続できませんでした', | |
}; | |
}; | |
$result->{result} = "(".($msgmap->{error}||'error').") $req->{url} ".($msgmap->{$res}||$res); | |
return $result; | |
} | |
my $protocol = $res->{Protocol}; | |
my $status_code = $res->{Code} || 0; | |
my $status_msg = $res->{Message}; | |
my $headers = $res->{Header}; # hash-ref. | |
my $content = $res->{Content}; | |
$result->{fetch_length} = defined($content) ? length($content) : undef; | |
defined($content) or $content = ''; | |
my @opts; | |
$result->{status_code} = $status_code; | |
$result->{content_length} = $headers->{'Content-Length'}; | |
if( !defined($result->{content_length}) && $res->{StreamState} eq 'finished' ) | |
{ | |
$result->{content_length} = length($content); | |
} | |
$DEBUG and $this->_debug($full_ch_name, "debug: fetch ".length($content)." bytes"); | |
# extract Cookies; | |
if( $headers->{'Set-Cookie'} ) | |
{ | |
$this->_extract_cookies($req); | |
} | |
if( my $loc = $headers->{Location} ) | |
{ | |
$DEBUG and $this->_debug($full_ch_name, "debug: has Location header: $loc"); | |
if( $loc =~ m{^\s*(\w+://[-.\w]+\S*)\s*$} ) | |
{ | |
$result->{redirect} = substr($loc, 0, length($1)); # keep taintness. | |
}elsif( $loc =~ m{^\s*((/?).*?(?:[#?].*)?)\s*$} ) | |
{ | |
my $path = substr($loc, 0, length($1)); # keep taintness. | |
my $is_abs = $2; | |
my $new_url = $req->{url}; | |
$new_url =~ s{[#?].*}{}; | |
if( $is_abs ) | |
{ | |
$new_url =~ s{^(\w+://[^/]+)/.*}{$1} or die "invalid req url(abs): $new_url"; | |
}else | |
{ | |
$new_url =~ s{^(\w+://[^/]+.*/).*}{$1} or die "invalid req url(rel): $new_url"; | |
} | |
$result->{redirect} = $new_url . $path; | |
}else | |
{ | |
$DEBUG and $this->_debug($full_ch_name, "debug: broken location url: $loc"); | |
} | |
$DEBUG && $result->{redirect} and $this->_debug($full_ch_name, "debug: Location redirect: $result->{redirect}"); | |
} | |
if( int($status_code / 100) != 2 && !$result->{redirect} ) | |
{ | |
$result->{title} = $status_msg; | |
push(@opts, "http status $status_code"); | |
} | |
# detect refresh tag. | |
my $content2 = $content; | |
$content2 =~ s/<!--.*?-->//g; | |
if( $content2 =~ m{ | |
<META(?:\s[^>]*?)?\s | |
(?:HTTP-EQUIV\s*=\s*(["'])refresh\1(?:\s[^>]*?)?\sCONTENT\s*=\s*(["'])(\d+)\s*;\s*URL=([^"'<]+)\2| | |
CONTENT\s*=\s*(["'])(\d+)\s*;\s*URL=([^"'<]+)\5(?:\s[^>]*?)?\sHTTP-EQUIV\s*=\s*(["'])refresh\8) | |
(?:\s[^>]*|/)?> | |
}ix ) | |
{ | |
my $after = $3 || $6; | |
my $url = $4 || $7; | |
$DEBUG and $this->_debug($full_ch_name, "debug: meta.refresh found: $after; $url"); | |
$result->{redirect} = $url; | |
} | |
# detect encoding. | |
my $enc = 'auto'; | |
if( $headers->{'Content-Type'} && $headers->{'Content-Type'} =~ /;\s*charset=(\S+)/ ) | |
{ | |
my $e = lc($1); | |
$enc = $e =~ /s\w*jis/ ? 'sjis' | |
: $e =~ /euc/ ? 'euc' | |
: $e =~ /utf-?8/ ? 'utf8' | |
: $e =~ /iso-2022-jp/ ? 'jis' | |
: $e =~ /\bjis\b/ ? 'jis' | |
: $enc; | |
$DEBUG and $this->_debug($full_ch_name, "debug: charset $enc from http-header ($e)"); | |
} | |
if( $enc eq 'auto' && $content2 =~ m{ | |
<meta(?:\s[^>]*?)?\s | |
(?:http-equiv\s*=\s*(["'])Content-Type\1(?:\s[^>]*?)?\scontent\s*=\s*(["'])\w+/\w+(?:\+\w+)*\s*;\s*charset=([-\w]+)\2| | |
content\s*=\s*(["'])\w+/\w+(?:\+\w+)*\s*;\s*charset=([-\w]+)\4(?:\s[^>]+?)?\shttp-equiv\s*=\s*(["'])Content-Type\6) | |
(?:\s[^>]*|/)?> | |
}ix ) | |
{ | |
my $e = lc($3 || $5); | |
$enc = $e =~ /s\w*jis/ ? 'sjis' | |
: $e =~ /euc/ ? 'euc' | |
: $e =~ /utf-?8/ ? 'utf8' | |
: $e =~ /iso-2022-jp/ ? 'jis' | |
: $e =~ /\bjis\b/ ? 'jis' | |
: $enc; | |
$DEBUG and $this->_debug($full_ch_name, "debug: charset $enc from meta ($e)"); | |
} | |
if( $enc eq 'auto' ) | |
{ | |
my $guessed = $ENCODER->new->getcode($content); | |
$enc = $guessed ne 'unknown' ? $guessed : 'sjis'; | |
$DEBUG and $this->_debug($full_ch_name, "debug: charset $enc from guess ($guessed)"); | |
} | |
# drop broken utf-8 sequences. | |
if( $enc eq 'utf8' && $content =~ s{([\xe0-\xef][\x80-\xbf]?)(?=[\x00-\x7e])}{join('',map{sprintf("[%02x]",$_)}unpack("C*",$1))}eg ) | |
{ | |
$DEBUG and $this->_debug($full_ch_name, "debug: broken utf-8 found and fixed"); | |
my $url = $req->{url}; | |
$this->_log("broken utf-8 on $url (enc=$enc)"); | |
$DEBUG and $this->_debug($req, "broken utf-8 on $url (enc=$enc)"); | |
} | |
# decode. | |
$content = $ENCODER->new($content, $enc)->utf8; | |
$content2 = $ENCODER->new($content2, $enc)->utf8; | |
$result->{decoded_content} = $content; | |
my ($title) = $content2 =~ m{<title(?:\s[^<>]*)?>\s*(.*?)\s*</title\s*>}is; | |
$DEBUG && !$title and $this->_debug($full_ch_name, "debug: no title elements in document"); | |
if( defined($title) ) | |
{ | |
$title = $this->_fixup_title($title); | |
$result->{title} = $title; | |
}else | |
{ | |
$title = $result->{title}; | |
} | |
my ($ctype) = split(/[ ;]/, $headers->{'Content-Type'}, 2); | |
$ctype ||= 'unknown/unkown'; | |
$result->{content_type} = $ctype; | |
$DEBUG and $this->_debug($full_ch_name, "debug: content-type: $ctype"); | |
my $reply = defined($title) ? $title : ''; | |
if( $reply eq '' ) | |
{ | |
$DEBUG and $this->_debug($full_ch_name, "debug: check icecast"); | |
if( my $icy_name = $headers->{'Icy-Name'} ) | |
{ | |
# Icecast. | |
my $desc = $headers->{'Icy-Description'}; | |
my $bitrate = $headers->{'Icy-Br'}; | |
$reply = $icy_name; | |
if( defined($bitrate) ) | |
{ | |
$reply .= " [${bitrate}k]"; | |
} | |
if( defined($desc) && $desc ne $icy_name ) | |
{ | |
$reply .= " - $desc"; | |
} | |
$reply = $ENCODER->new($reply,'auto')->utf8; | |
} | |
} | |
if( $ctype eq 'audio/x-mpegurl' && ($res->{StreamState} eq 'finished' || $res->{StreamState} eq 'body') ) | |
{ | |
if( $content =~ m{^(\w+://[-.\w:]+\S*)\s*\z} ) | |
{ | |
$result->{redirect} = substr($content, 0, length($1)); # keep taintness. | |
} | |
} | |
if( !$reply && $ctype eq 'audio/mpeg' && ($res->{StreamState} eq 'finished' || $res->{StreamState} eq 'body') ) | |
{ | |
if( $content =~ m{^ID3} && $HAS_TOOLS_ID3TAG ) | |
{ | |
# from raw content. | |
my $info = Tools::ID3Tag->extract($res->{Content}); | |
#$DEBUG and $this->_debug($req, "ID3Tag.size = ".($info->{size} || '-')."/".length($content)); | |
#$DEBUG and $this->_debug($req, "ID3Tag.version = ".($info->{version} || '-')); | |
#$DEBUG and $this->_debug($req, "ID3Tag.title = ".($info->{title} || '-')); | |
#$DEBUG and $this->_debug($req, "ID3Tag.album = ".($info->{album} || '-')); | |
#$DEBUG and $this->_debug($req, "ID3Tag.artist = ".($info->{artist} || '-')); | |
$reply = $info->{title} || 'no title'; | |
if( $info->{album} ) | |
{ | |
$reply .= " / $info->{album}"; | |
} | |
if( $info->{artist} ) | |
{ | |
$reply .= " ($info->{artist})"; | |
} | |
} | |
} | |
if( $reply eq '' || $ctype !~ /html/ ) | |
{ | |
push(@opts, $ctype); | |
} | |
if( $ctype =~ m{^(?:image|video)/} && $HAS_IMAGE_EXIFTOOL ) | |
{ | |
$DEBUG and $this->_debug($full_ch_name, "debug: check image"); | |
my @tags = qw(Title ImageSize Headline); | |
my $info = Image::ExifTool::ImageInfo(\$res->{Content}, @tags); | |
my $x = sub{ my $x=shift;$x=~s/([^ -~])/sprintf('[%02x]',unpack("C",$1))/ge;$x}; | |
$DEBUG and $this->_debug($full_ch_name, "debug: - ".$x->(join(", ", %$info))); | |
if( $reply eq '' ) | |
{ | |
my ($key) = grep{$info->{$_}} qw(Title Headline); | |
my $decoded_key = $info->{"$key (1)"} && "$key (1)"; | |
my $val = $info->{$decoded_key} || $info->{$key}; | |
my $guessed = $decoded_key ? 'decoded' : $ENCODER->getcode($val); | |
my $enc = $guessed eq 'unknown' ? 'sjis' : $guessed; | |
$DEBUG and $this->_debug($full_ch_name, "debug: - $key ($enc/$guessed) ".$x->($val)); | |
$reply ||= $decoded_key ? $info->{$decoded_key} : $ENCODER->new($val, $enc)->utf8; | |
} | |
if( $info->{ImageSize} ) | |
{ | |
push(@opts, $info->{ImageSize}); | |
} | |
} | |
if( $reply eq '' || $ctype !~ /html/ ) | |
{ | |
my $len = $result->{content_length}; | |
if( defined($len) ) | |
{ | |
$len =~ s/(?<=\d)(?=(\d\d\d)+(?!\d))/,/g; | |
$len = "$len bytes"; | |
push(@opts, $len); | |
} | |
} | |
if( $req->{redirected} ) | |
{ | |
my $redirs = $req->{redirected}==1 ? 'redir' : 'redirs'; | |
push(@opts, "$req->{redirected} $redirs"); | |
} | |
if( $reply eq '' && $ctype =~ /text/ ) | |
{ | |
$reply = '(untitled)'; | |
} | |
if( @opts ) | |
{ | |
$reply eq '' or $reply .= ' '; | |
$reply .= "(".join("; ", @opts).")"; | |
} | |
$result->{is_success} = 1; | |
$result->{result} = $reply; | |
$result; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment