isaaclw/parse_download.pl

## parse_download.pl
#!/usr/bin/perl
# download a page, and then download all the files on the page according to the regular expression, and store them in the folder
use strict;
use URI::URL;

my $urlchars = "[^\'\"]";

parse("http://www.test.com/", "data-bt=\"($urlchars*?\.torrent)\"", "humble");

sub parse() {
    my $start_url = shift;
    my $regex = shift;
    my $save = shift;

    print "Checking page $start_url\n";

    system("wget -O /tmp/file --quiet \"$start_url\"");
    open(FILE, "</tmp/file");

    my @all_urls;
    my $i = 0;
    while(<FILE>) {
        my($line) = $_;
        chomp($line);

        $line =~ /$regex/si;
        my $file = $1;
        if ($file) {
            my $url = url($file, $start_url)->abs->as_string;
            @all_urls[$i] = $url;
            $i++;
        }
    }

    # Do something to clean up array
    my %url_hash = map { $_ => 1 } @all_urls;
    my @urls = keys %url_hash;

    foreach my $url(@urls) {
        print " d: $url\n";
        system("wget --no-verbose --timestamping --directory-prefix=$save \"$url\"");
    }
}
	#!/usr/bin/perl
	# download a page, and then download all the files on the page according to the regular expression, and store them in the folder
	use strict;
	use URI::URL;

	my $urlchars = "[^\'\"]";

	parse("http://www.test.com/", "data-bt=\"($urlchars*?\.torrent)\"", "humble");

	sub parse() {
	my $start_url = shift;
	my $regex = shift;
	my $save = shift;

	print "Checking page $start_url\n";

	system("wget -O /tmp/file --quiet \"$start_url\"");
	open(FILE, "</tmp/file");

	my @all_urls;
	my $i = 0;
	while(<FILE>) {
	my($line) = $_;
	chomp($line);

	$line =~ /$regex/si;
	my $file = $1;
	if ($file) {
	my $url = url($file, $start_url)->abs->as_string;
	@all_urls[$i] = $url;
	$i++;
	}
	}

	# Do something to clean up array
	my %url_hash = map { $_ => 1 } @all_urls;
	my @urls = keys %url_hash;

	foreach my $url(@urls) {
	print " d: $url\n";
	system("wget --no-verbose --timestamping --directory-prefix=$save \"$url\"");
	}
	}