Skip to content

Instantly share code, notes, and snippets.

@dnmfarrell
Created September 13, 2014 13:31
Show Gist options
  • Save dnmfarrell/998b9199007589199dce to your computer and use it in GitHub Desktop.
Save dnmfarrell/998b9199007589199dce to your computer and use it in GitHub Desktop.
Perl script that downloads country SWIFT codes from www.bankswiftcode.org and outputs them in a pipe-delimited format
#!/usr/bin/env perl
# downloads country SWIFT codes from www.bankswiftcode.org and outputs them in a pipe-delimited format
# LICENSED Artistic License 2.0 http://opensource.org/licenses/Artistic-2.0
use strict;
use warnings;
use HTTP::Tiny;
use XML::LibXML;
die 'Please provide country' unless @ARGV;
my $base_url = lc "http://www.bankswiftcode.org/$ARGV[0]";
my $xml = XML::LibXML->new( recover => 2, silent => 1 )
; # set recovery for poorly formatted html
# set the user agent to Firefox
my $ua =
HTTP::Tiny->new( agent =>
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'
);
my $end =
lc $ARGV[0] eq 'united-states' ? 22 : 1; # USA has 22 pages, other countries 1
for ( 1 .. $end ) {
my $response =
$_ == 1
? $ua->get($base_url)
: $ua->get( $base_url . -$_ );
if ( $response->{success} ) {
print parse_html( $response->{content} ) if length $response->{content};
}
else {
print "Error: $response->{status} $response->{reason}\n";
last;
}
# wait for 2-7 seconds to prevent being blocked by target server
sleep( 2 + int rand(5) );
}
sub parse_html {
my ($html) = @_;
# extract table
if ( $html =~ /(<table id="t2">.*?<\/table>)/s ) {
my $dom = XML::LibXML->load_html( string => $1 );
my $response;
for ( $dom->findnodes('//tbody/tr') ) {
my $row = '';
for ( $_->getChildrenByTagName('td') ) {
$row .= /<td>(.*?)<\/td>/ ? "$1|" : "|";
}
$response .= substr( $row, 0, length($row) - 1 ) . "\n";
}
return $response;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment