Skip to content

Instantly share code, notes, and snippets.

@a-square
Created May 5, 2012 07:49
Show Gist options
  • Save a-square/2600751 to your computer and use it in GitHub Desktop.
Save a-square/2600751 to your computer and use it in GitHub Desktop.
A blip.tv downloader
#!/usr/bin/perl
##########################################################################
# example: perl get-blip.pl http://spoonyexperiment.com/.../
#
# if it doesn't work, make sure you copied the url from the addres bar,
# not a link to avoid getting a redirection page
##########################################################################
use strict;
use URI;
use URI::Escape;
use IO::Socket;
use LWP::Simple;
# getlow(url)
# low-level HTTP GET that emulates a typical Google Chrome request
sub getlow {
# extract the host and path parts from the url
my $uri = URI->new(shift);
$uri =~ /http:\/\/(.+?)\//;
my $host = $1;
my $path = $uri->path;
# form a request (by hand to trick some overly diligent anti-spam systems)
my $request = <<END;
GET $path HTTP/1.1
Host: $host
Connection: keep-alive
Cache-Control: max-age=0
Accept: application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.634.0 Safari/534.16
Accept-Language: en-US,en;q=0.8
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3
END
# follow the HTTP line ending convention
$request =~ s/\n/\r\n/g;
# add an empty line at the end to signify the end of the headers section
$request .= "\r\n";
# connect to the host
my $sock = IO::Socket::INET->new (
PeerAddr => $host,
PeerPort => '80',
Proto => 'tcp',
) or die "Couldn't create a socket: $!\n";
# send the request
print $sock $request;
# get the result piece by piece (as required by TCP)
my $result = '';
while (<$sock>) {
$result .= $_;
}
# close the connection
close $sock;
return $result;
}
# save(url)
# uses LWP to mirror the file by its original filename in the current dir
sub save {
my $url = shift;
$url =~ /^.+\/(.+)$/;
my $fname = $1;
mirror($url, $fname);
}
##############################################################################
# the main routine
##############################################################################
# get the page from the supplied url and find all the embeddings
my $r = getlow(shift);
my @videos = ($r =~ /<embed[^>]+src="(.+?)"/g);
foreach (@videos) {
# make sure to only download from blip
if ($_ =~ /blip.tv/) {
my $url = $_;
print $url, "\n";
# there are two types of blip urls, convert to the nicer one
if ($url =~ /http:\/\/a.blip.tv\/api.swf\#(.+)$/) {
$url = "http://blip.tv/play/$1";
print ">> $url", "\n";
}
# get that url (no reason for using getlow here, I'm just lazy)
my $vr = getlow($url);
# peel yet another layer of indirection to get an XML document
# that describes what resources should the blip.tv player
# choose from
$vr =~ /file=([^&\r]+)/;
$vr = getlow(uri_unescape($1));
# download all the files that have movie extensions
# a factoid: recently Spoony started including mp3 versions of
# his reviews, which might be handy when dling his vlog entries
my @files = ($vr =~ /url="([^\"]+\.(?:mp4|m4v|mov|flv|mkv))"/);
foreach (@files) {
print '---', $_, "\n";
save($_);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment