Skip to content

Instantly share code, notes, and snippets.

@malcom
Created September 9, 2009 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save malcom/183781 to your computer and use it in GitHub Desktop.
Save malcom/183781 to your computer and use it in GitHub Desktop.
Google Code Downloader
#!/usr/bin/perl
#
# GCD - Google Code Downloader
# Download source files and rebuild project structure from Google Code Search cache.
#
# usage: ./gcd url path
#
# Version: 0.3
# Licence: GPL
#
# Copyright (c) 2007-2008 Marcin 'Malcom' Malich, <me@malcom.pl>
#
# http://projects.malcom.pl/tools/gcd.xhtml
#
use strict;
use warnings;
use LWP::Simple;
use HTML::Entities;
$\ = "\n";
print "\n".'GCD - GoogleCode Downloader v0.3';
print '(c) 2008 MalCom <me@malcom.pl>'."\n";
if (!$ARGV[0] || !$ARGV[1]) {
print 'Fault of required arguments!';
print 'usage: ./gcd url path';
exit;
}
my $c = 0;
&fun($ARGV[0]);
print 'Downloaded '.$c.' files.';
sub fun {
my($path, $kat);
if ($_[1]) {
$path = $ARGV[1].'/'.$_[1];
$kat = $_[1].'/';
} else {
$path = $ARGV[1];
$kat = '';
}
mkdir $path || die "Couldn't create folder $path";
my $p = get($_[0]);
print $p; exit;
if (!($p =~ /<div id=filelist><pre class=dirlistpre>(.*?)<\/pre><\/div>/s)) {
return;
}
foreach ($1 =~ /^(.*)/mg) {
my $fn;
if (/<a href="(.*)" class="direlem">(.*)<\/a>(.?)/) {
# klase direlem maja wszystkie linki do katalogow i plikow w danym katalogu,
# procz .. a on nam i tak nie jest potrzebny ;)
$fn = $2;
my $k = $3;
my $url = 'http://www.google.com'.$1;
$url =~ s/&amp;/&/g;
if ($k eq '/') { # jesli to katalog
&fun($url, $kat.$fn);
next;
} else {
$_ = get($url);
}
} elsif (/<b>(.*)<\/b>/) {
# aktualny plik zawiera source danego pliku
$fn = $1;
$_ = $p;
} else {
next;
}
print 'Add file: '.$kat.$fn;
if (open FILE, '>'.$path.'/'.$fn) {
if (/<div id="code"><pre>(.*)<\/pre><\/div>/s) {
$_ = $1;
s/<.*?>//gs; # usuwanie tagow htmla
s/[\n\r]+$//; # znakow nowej lini z konca
print FILE decode_entities($_);
}
close FILE;
} else {
print "Couldn't create file $path/$fn";
}
$c++;
}
$kat = '';
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment