Skip to content

Instantly share code, notes, and snippets.

@philpennock
Last active February 27, 2017 17:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save philpennock/84265d7fc47f3d64784e6b5645915256 to your computer and use it in GitHub Desktop.
Save philpennock/84265d7fc47f3d64784e6b5645915256 to your computer and use it in GitHub Desktop.
An old script for building CDB files for Exim, registed as `bi_command` in exim.conf
#!/usr/bin/env perl
# BUG: "#" can not appear in a value
BEGIN { @INC = grep {$_ ne '.'} @INC };
use warnings;
use strict;
# XXX: make these overrideable with getopt, together with "rebuild all", etc.
my $flat_dir = '/etc/mail/flat';
my $cdb_dir = '/etc/mail/cdb';
my @cdb_group_candidates = qw( exim _exim );
my $cdb_group = '';
# {{{1IPAddressRegexps
my $IPV4_OCTET = qr/(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])/;
my $IPV4_REGEXP = qr/(?:(?:${IPV4_OCTET}\.){3}${IPV4_OCTET})/o;
# Allow a.b.c.d/n a.b.c/n a.b/n a/n but not a.b.c./n (no trailing dot):
my $IPV4_NETBLOCK_REGEXP = qr{(?: (?:
$IPV4_OCTET (?: \. $IPV4_OCTET){0,3}
) / (?:[1-9]|[12][0-9]|3[0-2])
)}ox;
# {{{2IPv6RegexpCommentary
# RFC 3986 states:
# IPv6address = 6( h16 ":" ) ls32
# / "::" 5( h16 ":" ) ls32
# / [ h16 ] "::" 4( h16 ":" ) ls32
# / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
# / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
# / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
# / [ *4( h16 ":" ) h16 ] "::" ls32
# / [ *5( h16 ":" ) h16 ] "::" h16
# / [ *6( h16 ":" ) h16 ] "::"
#
# ls32 = ( h16 ":" h16 ) / IPv4address
# ; least-significant 32 bits of address
#
# h16 = 1*4HEXDIG
# ; 16 bits of address represented in hexadecimal
#
# Note that we need to allow:
# 1:2:3:4:5:6:7:8 -- 7 colons
# ::2:3:4:5:6:7:8 -- 8 colons, skips leading 0
# 1::3:4:5:6:7:8 -- 7 colons again; or fewer
# 1:2:3:4:5:6::8 -- 7 or fewer
# 1:2:3:4:5:6:7:: -- 8 colons, skips trailing 0
# so there can be 8 colons only if two are doubled and are an affix.
# Otherwise there's always 7 colons at most.
#
# RFC 4291: IPv6 Addressing Architecture
# The use of "::" indicates one or more groups of 16 bits of zeros.
# The "::" can only appear once in an address. The "::" can also be
# used to compress leading or trailing zeros in an address.
# That's "1 or more", not "2 or more", so in effect when it's an affix
# there's a degenerate case where a colon (:) just replaces a zero (0).
#
# Bugs encountered after original writing:
# * Was missing two /o optimisations
# * Had extra | at end of the last line, permitting empty string to match
#
# }}}2IPv6RegexpCommentary
my $IPV6_H16 = qr/(?:[0-9a-fA-F]{1,4})/;
my $IPV6_LS32 = qr/(?:(?:${IPV6_H16}:${IPV6_H16})|${IPV4_REGEXP})/o;
my $IPV6_REGEXP = qr/(?:
(?:(?: (?:${IPV6_H16}:){6} )${IPV6_LS32}) |
(?:(?: :: (?:${IPV6_H16}:){5} )${IPV6_LS32}) |
(?:(?: (?: ${IPV6_H16} )? :: (?:${IPV6_H16}:){4} )${IPV6_LS32}) |
(?:(?: (?: (?:${IPV6_H16}:){0,1} ${IPV6_H16} )? :: (?:${IPV6_H16}:){3} )${IPV6_LS32}) |
(?:(?: (?: (?:${IPV6_H16}:){0,2} ${IPV6_H16} )? :: (?:${IPV6_H16}:){2} )${IPV6_LS32}) |
(?:(?: (?: (?:${IPV6_H16}:){0,3} ${IPV6_H16} )? :: ${IPV6_H16}: )${IPV6_LS32}) |
(?:(?: (?: (?:${IPV6_H16}:){0,4} ${IPV6_H16} )? :: )${IPV6_LS32}) |
(?:(?: (?: (?:${IPV6_H16}:){0,5} ${IPV6_H16} )? :: )${IPV6_H16} ) |
(?:(?: (?: (?:${IPV6_H16}:){0,6} ${IPV6_H16} )? :: ) )
)/ox;
my $IPV6_NETBLOCK_REGEXP = qr{(?:
$IPV6_REGEXP / (?:[1-9]|[1-9][0-9]|1[01][0-9]|12[0-8])
)}ox;
my $IP_NETBLOCK_REGEXP = qr/(?: $IPV4_NETBLOCK_REGEXP | $IPV6_NETBLOCK_REGEXP )/ox;
# }}}1IPAddressRegexps
# {{{1EmailRegexps
# See RFC 2821 (not all comments are terminology from there)
# Also atext and qtext from RFC 2822
# NB: this relies upon ASCII ordering; should not pose a problem
my $RE_ATEXT = qr/[A-Za-z0-9!#\$\%\&\'\*\+\/=\?\^_\`\{\|\}\~\-]/;
my $RE_QTEXT = qr/[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]/;
my $EMAIL_LHS_REGEXP = qr/
# Local-part
(?:
(?:
# Dot-string
(?:$RE_ATEXT)+ (?: \. $RE_ATEXT+)*
) | (?:
# Quoted-string
\" (?: \s* (?:
(?: $RE_QTEXT+ ) |
(?: \\ [\x01-\x09\x0b\x0c\x0e-\x7f] )
) )* \"
)
)
/ox;
my $EMAIL_DOMAIN_REGEXP = qr/
# Domain
(?:
(?:
# regular domain
(?:[A-Za-z0-9] (?: [A-Za-z0-9-]*[A-Za-z0-9] )?)
(?: \. [A-Za-z0-9] (?: [A-Za-z0-9-]*[A-Za-z0-9] )?)+
) | (?:
# address-literals
\[
(?: $IPV4_REGEXP | (?: [Ii][Pp][vV]6: $IPV6_REGEXP ) )
# NOTSUPP: General-address-literal
# G-A-L is a hook for future literal addresses
# and only specifies tag:content
\]
)
)
/ox;
my $EMAIL_REGEXP =
qr/ (?: (?:$EMAIL_LHS_REGEXP) \@ (?:$EMAIL_DOMAIN_REGEXP) ) /ox;
my $EMAIL_BRIEF_ALLOWED_REGEXP = qr/ (?:
$EMAIL_LHS_REGEXP
(?: \@ $EMAIL_DOMAIN_REGEXP )?
) /ox;
# }}}1EmailRegexps
my $HOSTNAME_REGEXP = qr/ \S+ /x;
# ######################################################################
# MAIN PROGRAM
use CDB_File;
use File::Spec;
use Getopt::Long;
use IO::File;
use NetAddr::IP qw(:lower);
use Pod::Usage;
sub ewarn; # errno
sub gwarn; # generic
sub lwarn; # lineno
sub rebuild_cdb;
my $exitval = 0;
(my $progname = $0) =~ s!^.*/!!;
my $require_perms = 1;
my ($help, $man) = (0, 0);
GetOptions(
"flat-dir=s" => \$flat_dir,
"cdb-dir=s" => \$cdb_dir,
"cdb-group=s" => \$cdb_group,
"help|?" => \$help,
"man" => \$man,
) or pod2usage(2);
pod2usage(1) if $help;
pod2usage(-existstatus => 0, -verbose => 2) if $man;
my $privfail = sub { die @_ };
if ($> != 0) {
warn "${progname}: Not invoked as root, success optional\n";
# we don't bump exit val, so don't use gwarn
$privfail = sub { warn @_ };
}
if ($cdb_group eq '') {
foreach my $grpname (@cdb_group_candidates) {
my $t = getgrnam($grpname);
if (defined $t) {
$cdb_group = $grpname;
last;
}
}
die "${progname}: no candidate CDB group name found on system, aborting!\n" unless $cdb_group ne '';
}
# For IPv6, allow enumeration to handle up to 2^20 results per block
NetAddr::IP::netlimit 20;
opendir(FLATD, $flat_dir) or die "opendir($flat_dir) failed: $!\n";
my @flat_configs = grep {$_ !~ /^\./} readdir(FLATD);
closedir(FLATD);
if ($cdb_dir eq $flat_dir) {
@flat_configs = grep {$_ !~ /\.cdb(?:\.\d+)?$/} @flat_configs;
}
unless (@flat_configs) {
die "${progname}: no input files found\n";
}
if ($cdb_group =~ /\D+/) {
my $tmpscalar = getgrnam($cdb_group);
if (not defined $tmpscalar) {
die "${progname}: no such group $cdb_group, aborting!\n";
}
$cdb_group = $tmpscalar;
}
my @cdbdir_stat = stat($cdb_dir);
unless (@cdbdir_stat) {
die "${progname}: missing directory $cdb_dir\n";
}
if ($cdbdir_stat[5] != $cdb_group) {
my $count = chown -1, $cdb_group, $cdb_dir;
$privfail->("chgrp($cdb_dir) failed: $!\n") unless $count == 1;
}
umask 0027;
foreach my $fn (@flat_configs) {
my $flat = File::Spec->catfile($flat_dir, $fn);
my $cdb = File::Spec->catfile($cdb_dir, "$fn.cdb");
my @st_flat = stat($flat);
unless (@st_flat) {
ewarn qq{Failed to stat "$flat"};
next;
}
my @st_cdb = stat($cdb);
if (@st_cdb == 0 or $st_flat[9] >= $st_cdb[9]) {
print "Updating: $fn\n";
rebuild_cdb($flat, $cdb);
my @new_st_cdb = stat($cdb);
if (@new_st_cdb) {
if ($new_st_cdb[5] != $cdb_group) {
my $count = chown -1, $cdb_group, $cdb;
$privfail->("chgrp($cdb) failed: $!\n") unless $count == 1;
}
} else {
ewarn qq{Failed to stat "$cdb"};
}
}
}
exit $exitval;
# ######################################################################
# SUBS
sub lwarn
{
++$exitval if $exitval < 99;
warn qq/${progname}: $_[1]\n\tline $. of $_[0]\n/;
}
sub ewarn
{
++$exitval if $exitval < 99;
warn qq/${progname}: $_[0]: $!\n/;
}
sub gwarn
{
++$exitval if $exitval < 99;
warn qq/${progname}: $_[0]\n/;
}
sub ldie
{
die qq/${progname}: $_[1]\n\tline $. of $_[0]\n/;
}
sub cdb_insert
{
my ($origin, $cdb, $key, $value, $options) = @_;
return $cdb->insert($key, $value) unless $options->{netblock4_autosize} or $options->{netblock6_autosize};
my $targ;
my $block = NetAddr::IP->new($key);
if ($block->version() == 6) {
$targ = $options->{netblock6_autosize};
return $cdb->insert($key, $value) if $targ == 0;
ldie $origin, "IPv6 netblock6_autosize $targ out of range" unless $targ > 0 and $targ <= 128;
} elsif ($block->version() == 4) {
$targ = $options->{netblock4_autosize};
return $cdb->insert($key, $value) if $targ == 0;
ldie $origin, "IPv4 netblock4_autosize $targ out of range" unless $targ > 0 and $targ <= 32;
} else {
ldie $origin, "Unknown address family for [$key]";
}
ldie $origin, "Not an IP netblock key: $key" unless defined $block;
my $c = 0;
my $items;
eval {
$items = $block->splitref($targ);
};
# module change; now dies, used to return undef; handle both
if ($@) {
lwarn $origin, "Unable to split [$key] to netblocks sized $targ";
return $cdb->insert($key, $value);
}
unless (defined $items) {
# Eg, asked for /19, got given a /20, complain but process
lwarn $origin, "Unable to split [$key] to netblocks sized $targ";
return $cdb->insert($key, $value);
}
foreach my $n (@{$items}) {
$cdb->insert("$n", $value);
++$c;
}
return $c;
}
sub rebuild_cdb
{
my ($flat, $cdb, $def_key) = @_;
$def_key = 'email' unless defined $def_key;
my $def_is_lhs = 0;
my $default_value = '';
my %key_regexps = (
email => $EMAIL_BRIEF_ALLOWED_REGEXP,
ipv4_netblock => $IPV4_NETBLOCK_REGEXP,
ipv6_netblock => $IPV6_NETBLOCK_REGEXP,
ip_netblock => $IP_NETBLOCK_REGEXP,
ipv4_address => $IPV4_REGEXP,
ipv6_address => $IPV6_REGEXP,
host => $HOSTNAME_REGEXP,
);
my %opts = (
lowerkey => 0,
key => lc $def_key,
netblock4_autosize => 0,
netblock6_autosize => 0,
);
my %opttype = (
lowerkey => 'bool',
key => 'keyregexp',
netblock4_autosize => 'int',
netblock6_autosize => 'int',
);
my %boolvals = (
true => 1,
yes => 1,
on => 1,
ja => 1,
false => 0,
'no' => 0,
off => 0,
nee => 0,
);
unless (exists $key_regexps{$opts{key}}) {
gwarn "Key type \"$opts{key}\" not valid";
return;
}
my $fh = new IO::File $flat, '<' or do {
ewarn "read-open($flat) failed";
return;
};
my $c = new CDB_File ($cdb, "$cdb.$$") or do {
ewarn "CDB file creation ($cdb) failed";
$fh->close();
return;
};
while (<$fh>) {
chomp;
s/(?:^|\s)#(?!%).*\z//;
s/^\s+//; s/\s+\z//;
next if /^$/;
if (/^\#?% (\S+) (?:\s+ (.+))?\z/x) {
my ($k, $v) = (lc($1), $2);
unless (exists $opts{$k}) {
lwarn $flat, "unknown option {$k}";
next;
}
if ($opttype{$k} eq 'bool') {
$v = 1 unless defined $v;
my $i = lc $v;
if ($v =~ /^\d+\z/) {
# cool
} elsif (exists $boolvals{$i}) {
$v = $boolvals{$i};
} else {
lwarn $flat, "unparsed boolean value for $k: $v";
next;
}
} elsif ($opttype{$k} eq 'int') {
$v = 0 unless defined $v;
$v = int $v; # user may see perl complaint; okay
} elsif ($opttype{$k} eq 'string') {
$v = '' unless defined $v;
} elsif ($opttype{$k} eq 'keyregexp') {
my $valid = join ' ', sort keys %key_regexps;
unless (defined $v) {
lwarn $flat, "need a key type for $k; valid: $valid";
next;
}
my $i = lc $v;
unless (exists $key_regexps{$i}) {
lwarn $flat, "key type \"$v\" not valid; allowed: $valid";
next;
}
$v = $i;
} else {
die qq/${progname}: internal error, unhandled option type '$opttype{$k}' for option $k, line $. of $flat\n/;
}
$opts{$k} = $v;
next;
}
if (/^:LHS\z/i) {
$def_is_lhs = 1;
undef $default_value;
next;
}
if (/^:(?:-|NO|NONE)\z/) {
undef $default_value;
$def_is_lhs = 0;
next;
}
if (/^:(?:EMPTY|BLANK)\z/) {
$default_value = '';
$def_is_lhs = 0;
next;
}
if (/^ : \s* (.+) \z/x) {
$default_value = $1;
next;
}
my $key_matcher = $key_regexps{$opts{key}};
if (/^($key_matcher)\z/) {
my $lhs = $1;
$lhs = lc($lhs) if $opts{lowerkey};
if (defined $default_value) {
cdb_insert($flat, $c, $lhs, $default_value, \%opts);
} elsif ($def_is_lhs) {
cdb_insert($flat, $c, $lhs, $lhs, \%opts);
} else {
lwarn $flat, 'no default value available';
$fh->close();
undef $c;
return;
}
next;
}
if (/^ ($key_matcher)
(?: \s+ (?! : ) | (?:
\s* : \s*
)) (\S.*?) \z /x) {
my ($k, $v) = ($1, $2);
$k = lc($k) if $opts{lowerkey};
cdb_insert($flat, $c, $k, $v, \%opts);
next;
}
lwarn $flat, "General parse error {$_}\n" .
"\tKey-type=[$opts{key}]\n";
$fh->close();
undef $c;
return;
}
$fh->close() or do {
ewarn "read-close($flat) failed";
undef $c; # untested interaction
return;
};
$c->finish() or do {
ewarn qq{finalisation failed for "$cdb"};
return;
}
}
__END__
=head1 NAME
update_mail_cdbs - Update mail-system CDB files
=head1 SYNOPSIS
update_mail_cdbs [devel-options]
=head1 OPTIONS
=over
=item B<--flat-dir>
Override the directory where flat files live.
=item B<--cdb-dir>
Override the directory where CDB files should be put.
=item B<--cdb-group>
Override the default group ownership of generated CDB files.
=back
=head1 DESCRIPTION
Take a bunch of flat files and generate CDB files from the contents,
with rules optimised for use in a mail-system.
This is appropriate for setting as C<bi_command> in Exim's configuration file.
=head1 FILE FORMAT
Files contain entries, one per line, mostly defining key-value pairs.
Leading and trailing whitespace is ignored.
Blank lines are ignored.
Directives are lines starting either C<%> or C<#%>.
Comments are lines starting with C<#> other than those starting C<#%>.
Lines starting C<:> change the default value for lines which only have keys.
Some special keywords are accepted, otherwise the line gives the literal value.
Items are formed as C<key : value> with optional whitespace and optional colon.
If the value needs to start with a colon, then use a colon as a separator too.
Keys are validated using one of a set of named validators;
the default is C<email>.
Directives and defaults apply to subsequent lines;
you can switch part-way through a file, though that's rarely a good idea.
=head2 Directives
Boolean values can be given in a few forms and if omitted is treated as true.
Thus a boolean directive on its own just means "turn this on".
=over
=item C<%lowerkey> I<bool>
Lower-case the key, for lookups.
=item C<%key> I<validator>
The key must pass the named validator;
default is C<email>, list of available validators is below.
=item C<%netblock4_autosize> I<int>
IPv4 netblock keys for netblocks larger than this size will be
split into multiple keys, each for a netblock of this size.
=item C<%netblock6_autosize> I<int>
IPv6 netblock keys for netblocks larger than this size will be
split into multiple keys, each for a netblock of this size.
=back
=head2 Validators
The C<%key> directive selects amongst these validators; the default is C<email>.
=over
=item C<email>
=item C<ipv4_netblock>
=item C<ipv6_netblock>
=item C<ip_netblock>
=item C<ipv4_address>
=item C<ipv6_address>
=item C<host>
=back
=head2 Default Values
The default default value is the empty string; thus entries mean that the key
will exist but the value will be empty.
Lines starting with a colon define the default value.
A few special keywords define behavior, else everything after the colon is the
new default value.
=over
=item C<:LHS>
The key will be used as the default value.
=item C<:NO> or C<:NONE>
There is no default value and lines lacking a default will become syntax errors.
=item C<:EMPTY> or C<:BLANK>
Use an empty string as the default value.
=back
=head1 EXAMPLES
A simple alias file:
%lowerkey
fred : fred@example.org
frederic : fred
wilma: wilma@example.org
A list of domain-mappings, for domain canonicalization:
:LHS
example.org
example.com
: example.net
example.net
foo.example.net
bar.example.net
:NONE
baz.example.org: example.org
A list of (bogon) netblocks which are to be stored in the CDB as C</24> entries;
we use C<#%> so that the directives look like comments to most other parsers:
#%key ipv4_netblock
#%netblock4_autosize 24
0.0.0.0/8
10.0.0.0/8
100.64.0.0/10
127.0.0.0/8
169.254.0.0/16
172.16.0.0/12
192.0.0.0/24
192.0.2.0/24
192.168.0.0/16
198.18.0.0/15
198.51.100.0/24
203.0.113.0/24
224.0.0.0/3
=head1 CAVEATS
There is currently no validation of the values, only of the keys.
=head1 AUTHOR
Phil Pennock. PGP KeyID 0x4D1E900E14C1CC04.
=cut
# vim: set foldmethod=marker :
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment