Skip to content

Instantly share code, notes, and snippets.

@JeffreyVdb
Last active August 29, 2015 14:01
Show Gist options
  • Save JeffreyVdb/a71506d6eeb619a049ac to your computer and use it in GitHub Desktop.
Save JeffreyVdb/a71506d6eeb619a049ac to your computer and use it in GitHub Desktop.
Perl async url getter for refreshing drupal cronjob links
#!/usr/bin/env perl
use Modern::Perl;
use autodie;
use warnings;
use threads;
use Thread::Queue qw( );
use Getopt::Long;
require LWP::UserAgent;
use Benchmark;
my $numThreads = 4;
my $logFile = undef;
GetOptions(
"threads=i" => \$numThreads,
"logfile=s" => \$logFile
);
# Open logfile is not undefined
open (STDOUT, '>>', $logFile) or die 'cant open logfile' if defined $logFile;
say '=== UPDATE START ===';
# visit url and log results
sub visit_url {
my ($url) = @_;
sub logStatus {
my $message = shift;
my $id = threads->tid();
say "<thread $id:" . localtime() . ">\t$message";
}
my $ua = LWP::UserAgent->new(max_redirect => 0);
$ua->agent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/35.0.1916.114 Safari/537.36');
# fetch url
my $response = $ua->get($url);
my $rCode = $response->code;
my $contentLength = length($response->decoded_content);
logStatus "[" . ($response->is_success ? 'SUCCESS' : 'FAILURE') . "][HTTP $rCode, " .
"LEN $contentLength] $url";
}
# main program flow
{
my $q = Thread::Queue->new();
my @threads;
for (1..$numThreads) {
push @threads, async {
while (my $url = $q->dequeue()) {
visit_url($url);
}
};
}
while (<>) {
chomp;
next if ! /^http.*$/;
$q->enqueue($_);
}
# clear threads
$q->enqueue(undef) for 1..$numThreads;
$_->join() foreach @threads;
}
say '=== UPDATE END ===';
say '';
#!/usr/bin/env ruby
require 'thread'
require 'optparse'
require 'ostruct'
require 'pp'
require 'logger'
require 'net/http'
queue = Queue.new
threads = []
options = OpenStruct.new
options.logfile = STDOUT
options.threads = 4
parser = OptionParser.new do |opts|
opts.banner = "Usage: #{$0} [options]"
opts.on("-t", "--threads N", Integer, "number of threads to use") do |num_threads|
options.threads = num_threads
end
opts.on("-l", "--logfile FILE", "the file to log to") do |logfile|
options.logfile = logfile
end
opts.on_tail('-h', '--help', 'show this message') do
puts opts
exit
end
end
parser.parse!
# create a logger
logger = Logger.new(options.logfile, 'weekly')
logger.info('program started')
def visit_url(uri, logger)
res = Net::HTTP.get_response(uri)
case res
when Net::HTTPSuccess
logger.info(uri.to_s) {'http success'}
unless res.body.size == 0
logger.warn(uri.to_s) {'got output'}
end
else
logger.error(uri.to_s) {"gave http code #{res.code}"}
end
end
# add work to queue
ARGF.each do |line|
if /^http/.match(line)
queue << line.chomp
end
end
# start threads
options.threads.times do
threads << Thread.new do
until queue.empty?
url = queue.pop(true) rescue nil
if url
uri = URI(url)
visit_url(uri, logger)
end
end
end
end
# wait until threads ended
threads.each { |t| t.join }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment