Last active
December 9, 2018 07:00
-
-
Save nobrowser/b697e748caa9a72d4bfa41be12595550 to your computer and use it in GitHub Desktop.
Perl code to download netnews articles via NNTP in batch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env perl | |
use strict; | |
use warnings; | |
use Net::NNTP; | |
use Getopt::Long qw(:config no_permute); | |
use Fcntl; | |
$main::usage = <<'EOF' | |
usage: perlsuck [ OPTIONS ... ] | |
options: | |
--auth login with the NNTP_USER and NNTP_PASS environs; SSL only | |
--server NAME host/domain name of nntp server; default: ENV{NNTPSERVER} | |
--ssl encrypt connection and use port 563 | |
EOF | |
; | |
$main::article_serial = 1; | |
$main::conn = undef; | |
sub max { | |
return ($_[0] > $_[1] ? $_[0] : $_[1]); | |
} | |
sub spool_article { | |
my $lines = shift; | |
for (@{$lines}) { | |
local $/ = | |
(substr($_, -2) eq "\r\n" && "\r\n") | |
|| (substr($_, -1) eq "\n" && "\n") | |
|| (substr($_, -1) eq "\r" && "\r"); | |
chomp; $_ .= "\n"; | |
} | |
our $article_serial; | |
my $article_name = join('/', 'spool', ''. $article_serial); | |
sysopen(my $fh, $article_name, O_WRONLY|O_CREAT|O_EXCL) | |
or die("failed to open $article_name: $!"); | |
$fh->print(@{$lines}) | |
or die("failed to write $article_name: $!"); | |
$fh->flush(); | |
$fh->close(); | |
$article_serial++; | |
} | |
sub fetch_article { | |
my $artnum = shift; | |
our $conn; | |
my $lines = $conn->article($artnum); | |
my $code = 0+ $conn->code(); | |
return ($code, $lines); | |
} | |
sub fetch_article_range { | |
my ($first, $last) = @_; | |
my $last_fetched = undef; | |
ARTICLES_RANGE: | |
for my $next ($first .. $last) { | |
my ($code, $lines) = fetch_article($next); | |
# if fetching from a range bounded above by server, and server gives a 423, | |
# the article will never appear (probably cancelled), so keep moving | |
next ARTICLES_RANGE if $code == 423; | |
last ARTICLES_RANGE unless $code < 400 && defined $lines; | |
spool_article($lines); | |
$last_fetched = $next; | |
} | |
return $last_fetched; | |
} | |
sub fetch_circled_articles { | |
my ($first, $last) = @_; | |
my $next = $first; | |
my $last_fetched = undef; | |
my ($code, $lines) = fetch_article($next); | |
ARTICLES_CIRCLED: | |
while ($code < 400 && defined $lines) | |
{ | |
spool_article($lines); | |
$last_fetched = $next++; | |
($code, $lines) = fetch_article($next); | |
} | |
return $last_fetched if $code != 423; | |
return fetch_article_range(1, $last) || $last_fetched; | |
} | |
sub read_subscriptions { | |
my (@new_subs, @old_subs, @disabled_subs); | |
open(my $fh, '<subscriptions') | |
or die("failed to open subscriptions file: $!"); | |
SUBS: | |
while (!$fh->eof()) { | |
defined($_ = <$fh>) | |
or die("failed to read subscriptions file: $!"); | |
my @parts = split; | |
next SUBS if $_ =~ /^\s*(#.*)?$/; | |
die("invalid subscriptions line: $_") | |
unless $#parts >= 1 && $parts[1] =~ /^([-#]?)([0-9]+)$/; | |
my $group_data = join(' ', $parts[0], ''.$2); | |
push(@disabled_subs, $group_data) if $1 eq '#'; | |
push(@new_subs, $group_data) if $1 eq '-'; | |
push(@old_subs, $group_data) if $1 eq ''; | |
} | |
$fh->close(); | |
return (\@new_subs, \@old_subs, \@disabled_subs); | |
} | |
sub write_subscriptions { | |
my $updated_subs = shift; | |
open(my $fh, '>subscriptions.new') | |
or die("failed to open subscriptions file: $!"); | |
$fh->print(@{$updated_subs}) | |
or die("failed to write subscriptions file: $!"); | |
$fh->flush(); | |
$fh->close(); | |
} | |
sub open_group { | |
my $group = shift; | |
our $conn; | |
my ($count, $server_first, $server_last, $server_name) = $conn->group($group); | |
$server_name eq $group or die("failed to open group $group: $!"); | |
return ($server_first, $server_last, $server_last < $server_first); | |
} | |
sub read_new_group { | |
my ($group, $howfew) = @_; | |
my ($server_first, $server_last, $server_circle) = open_group($group); | |
my $iwant = $server_last - $howfew + 1; | |
my $available = ($server_circle ? 1 : $server_first); | |
return fetch_article_range(max($iwant, $available), $server_last); | |
} | |
sub read_old_group { | |
my ($group, $ihave) = @_; | |
my ($server_first, $server_last, $server_circle) = open_group($group); | |
my $pending_circle = $server_last < $ihave; | |
return fetch_article_range($server_first, $server_last) | |
if ($pending_circle && !$server_circle); | |
return fetch_article_range($ihave+1, $server_last) | |
if (!$pending_circle && $server_circle); | |
my $available = max($ihave + 1, $server_first); | |
return fetch_article_range($available, $server_last) | |
if (!$pending_circle && !$server_circle); | |
return fetch_circled_articles($available, $server_last); | |
} | |
sub main { | |
our $conn; | |
my $ssl = 0; | |
my $auth = 0; | |
my $server = $ENV{'NNTPSERVER'} || 'news.eternal-september.org'; | |
our $usage; | |
GetOptions("auth" => \$auth, | |
"ssl" => \$ssl, | |
"server=s" => \$server) | |
or die($usage); | |
my ($new_subs, $old_subs, $disabled_subs) = read_subscriptions(); | |
$conn = Net::NNTP->new($server, SSL => $ssl, Reader => 1, Debug => 1); | |
defined $conn | |
or die("failed to connect: $!"); | |
if ($ssl && $auth) { | |
$conn->authinfo($ENV{'NNTP_USER'}, $ENV{'NNTP_PASS'}) | |
or die("failed to authenticate: $!"); | |
} | |
my @updated_subs; | |
NEW_GROUP: | |
for (@{$new_subs}) { | |
my ($group, $howfew) = split; | |
my $last_fetched = read_new_group($group, $howfew); | |
push(@updated_subs, sprintf("%s -%d\n", $group, $howfew)), next NEW_GROUP | |
unless $last_fetched; | |
push(@updated_subs, sprintf("%s %d\n", $group, $last_fetched)); | |
} | |
OLD_GROUP: | |
for (@{$old_subs}) { | |
my ($group, $ihave) = split; | |
my $last_fetched = read_old_group($group, $ihave); | |
push(@updated_subs, sprintf("%s %d\n", $group, $ihave)), next OLD_GROUP | |
unless $last_fetched; | |
push(@updated_subs, sprintf("%s %d\n", $group, $last_fetched)); | |
} | |
DISABLED_GROUP: | |
for (@{$disabled_subs}) { | |
my ($group, $article) = split; | |
push(@updated_subs, sprintf("%s #%d\n", $group, $article)); | |
} | |
$conn->quit(); | |
STDERR->flush(); | |
write_subscriptions(\@updated_subs); | |
} | |
# Since this script will generally run in the front of a pipeline, | |
# it is not enough to set an exit status because that cannot be detected | |
# by a Bourne shell. Write an indication to a file as well. | |
sub write_timestamp { | |
unlink('timestamp'); | |
open(my $fh, '>timestamp'); | |
$fh->printf("%d\n", time()); | |
$fh->flush(); | |
$fh->close(); | |
} | |
unlink('timestamp'); | |
main(); | |
write_timestamp(); | |
__END__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment