Skip to content

Instantly share code, notes, and snippets.

@lopnor
Created September 16, 2010 08:46
Show Gist options
  • Save lopnor/582147 to your computer and use it in GitHub Desktop.
Save lopnor/582147 to your computer and use it in GitHub Desktop.
#!perl
use strict;
use warnings;
use Web::Scraper;
use URI;
use YAML;
my $found = sub { 1 };
$| = 1;
my $scraper = scraper {
process '.vevent' => 'hCalendar' => $found;
process '.vcard' => 'hCard' => $found;
process '.hentry' => 'hAtom' => $found;
process '.hrecipe' => 'hRecipe' => $found;
process '.hresume' => 'hResume' => $found;
process '.hnews' => 'hNews' => $found;
process '.xfolkentry' => 'xFolk' => $found;
process '.hmedia' => 'hMedia' => $found;
process '.haudio' => 'hAudio' => $found;
process '.hproduct' => 'hProduct' => $found;
};
my $file = shift;
open my $fh, '<', $file or die;
my $i = 0;
while (my $line = <$fh>) {
chomp $line;
warn sprintf("%08d:%s\n", ++$i, $line);
my $result = eval {$scraper->scrape(URI->new($line))} or next;
print join("\t", $line, $_), "\n" for keys %$result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment