Skip to content

Instantly share code, notes, and snippets.

@xaicron
Created March 28, 2009 06:53
Show Gist options
  • Save xaicron/87042 to your computer and use it in GitHub Desktop.
Save xaicron/87042 to your computer and use it in GitHub Desktop.
youtube videos download script
#!C:/Perl/bin/perl
# Youtubeから最高画質の動画をダウンロードする
# fmt35とかもいける
use strict;
use warnings;
use utf8;
use Encode;
use LWP::UserAgent;
use File::Basename;
use Web::Scraper;
use URI;
use JSON qw/from_json/;
# ファイル名のエンコード
my $enc = 'cp932';
# 引数
my $url = shift || die "Usage: $0 youtube_rul";
# スクレイピング
my $uri = URI->new($url);
my $scraper = scraper {
process '/html/head/script', 'scripts[]' => 'html';
process '//*[@id="watch-vid-title"]/h1', title => 'TEXT';
};
my $result = $scraper->scrape($uri) or die "Oops!";
# JSON取得
my $json;
for my $line (split qq{\n}, join q{}, @{$result->{scripts}}) {
if ($line =~ /^\s*var\s*swfArgs\s*=\s*({.*});/) {
$json = HTML::Entities::decode_entities($1);
last;
}
}
my $swfArgs = from_json $json;
# 最高画質のfmt取得
my $fmt;
for my $map (split /,/, $swfArgs->{fmt_map}) {
next if $map =~ m|^(\d+)/(\d+)| and $2 eq '0'; # 2つ目の数値が0だったら存在しないっぽい?
$fmt = "&fmt=$1";
last;
}
# 定義がなかったらfmt=18とする
$fmt = '&fmt=18' unless $fmt;
# ファイルの拡張子
my $suffix = $fmt =~ /(18|22)/ ? '.mp4' : '.flv';
# ダウンロードURL
my $video_url = sprintf "http://www.youtube.com/get_video?video_id=%s&t=%s%s", $swfArgs->{video_id}, $swfArgs->{t}, $fmt;
# 保存するファイル名
my $filename = encode $enc, $result->{title} . $suffix;
# ファイルがあったら終了
die "File exists ($filename)" if -f $filename;
# 進捗表示しつつダウンロード
open my $wfh, '>', $filename or die "$filename: $!";
binmode $wfh;
print "$video_url\n";
print "Downloading -> $filename\n";
my $res = LWP::UserAgent->new->get(
$video_url,
':content_cb' => sub {
my ( $chunk, $res, $proto ) = @_;
print $wfh $chunk;
my $size = tell $wfh;
if (my $total = $res->header('Content-Length')) {
printf "%d/%d (%f%%)\r", $size, $total, $size/$total * 100;
}
else {
printf "%d/Unknown bytes\r", $size;
}
},
);
close $wfh;
print "\n", $res->status_line, "\n";
unlink $filename unless $res->is_success;
exit 1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment