philpennock/update_mail_cdbs.pl

## update_mail_cdbs.pl
#!/usr/bin/env perl

# BUG: "#" can not appear in a value

BEGIN { @INC = grep {$_ ne '.'} @INC };
use warnings;
use strict;

# XXX: make these overrideable with getopt, together with "rebuild all", etc.
my $flat_dir = '/etc/mail/flat';
my $cdb_dir = '/etc/mail/cdb';
my @cdb_group_candidates = qw( exim _exim );
my $cdb_group = '';

# {{{1IPAddressRegexps
my $IPV4_OCTET = qr/(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])/;
my $IPV4_REGEXP = qr/(?:(?:${IPV4_OCTET}\.){3}${IPV4_OCTET})/o;
# Allow a.b.c.d/n a.b.c/n a.b/n a/n but not a.b.c./n (no trailing dot):
my $IPV4_NETBLOCK_REGEXP = qr{(?: (?:
	$IPV4_OCTET (?: \. $IPV4_OCTET){0,3}
	) / (?:[1-9]|[12][0-9]|3[0-2])
	)}ox;
# {{{2IPv6RegexpCommentary
# RFC 3986 states:
#       IPv6address =                            6( h16 ":" ) ls32
#                   /                       "::" 5( h16 ":" ) ls32
#                   / [               h16 ] "::" 4( h16 ":" ) ls32
#                   / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
#                   / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
#                   / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
#                   / [ *4( h16 ":" ) h16 ] "::"              ls32
#                   / [ *5( h16 ":" ) h16 ] "::"              h16
#                   / [ *6( h16 ":" ) h16 ] "::"
#
#       ls32        = ( h16 ":" h16 ) / IPv4address
#                   ; least-significant 32 bits of address
#
#       h16         = 1*4HEXDIG
#                   ; 16 bits of address represented in hexadecimal
#
# Note that we need to allow:
#   1:2:3:4:5:6:7:8   -- 7 colons
#   ::2:3:4:5:6:7:8   -- 8 colons, skips leading 0
#   1::3:4:5:6:7:8    -- 7 colons again; or fewer
#   1:2:3:4:5:6::8    -- 7 or fewer
#   1:2:3:4:5:6:7::   -- 8 colons, skips trailing 0
# so there can be 8 colons only if two are doubled and are an affix.
# Otherwise there's always 7 colons at most.
#
# RFC 4291: IPv6 Addressing Architecture
#   The use of "::" indicates one or more groups of 16 bits of zeros.
#   The "::" can only appear once in an address.  The "::" can also be
#   used to compress leading or trailing zeros in an address.
# That's "1 or more", not "2 or more", so in effect when it's an affix
# there's a degenerate case where a colon (:) just replaces a zero (0).
#
# Bugs encountered after original writing:
#  * Was missing two /o optimisations
#  * Had extra | at end of the last line, permitting empty string to match
#
# }}}2IPv6RegexpCommentary
my $IPV6_H16 = qr/(?:[0-9a-fA-F]{1,4})/;
my $IPV6_LS32 = qr/(?:(?:${IPV6_H16}:${IPV6_H16})|${IPV4_REGEXP})/o;
my $IPV6_REGEXP = qr/(?:
  (?:(?:                                             (?:${IPV6_H16}:){6} )${IPV6_LS32}) |
  (?:(?:                                          :: (?:${IPV6_H16}:){5} )${IPV6_LS32}) |
  (?:(?: (?:                       ${IPV6_H16} )? :: (?:${IPV6_H16}:){4} )${IPV6_LS32}) |
  (?:(?: (?: (?:${IPV6_H16}:){0,1} ${IPV6_H16} )? :: (?:${IPV6_H16}:){3} )${IPV6_LS32}) |
  (?:(?: (?: (?:${IPV6_H16}:){0,2} ${IPV6_H16} )? :: (?:${IPV6_H16}:){2} )${IPV6_LS32}) |
  (?:(?: (?: (?:${IPV6_H16}:){0,3} ${IPV6_H16} )? ::    ${IPV6_H16}:     )${IPV6_LS32}) |
  (?:(?: (?: (?:${IPV6_H16}:){0,4} ${IPV6_H16} )? ::                     )${IPV6_LS32}) |
  (?:(?: (?: (?:${IPV6_H16}:){0,5} ${IPV6_H16} )? ::                     )${IPV6_H16} ) |
  (?:(?: (?: (?:${IPV6_H16}:){0,6} ${IPV6_H16} )? ::                     )            )
  )/ox;
my $IPV6_NETBLOCK_REGEXP = qr{(?:
	$IPV6_REGEXP / (?:[1-9]|[1-9][0-9]|1[01][0-9]|12[0-8])
	)}ox;
my $IP_NETBLOCK_REGEXP = qr/(?: $IPV4_NETBLOCK_REGEXP | $IPV6_NETBLOCK_REGEXP )/ox;
# }}}1IPAddressRegexps
# {{{1EmailRegexps
# See RFC 2821 (not all comments are terminology from there)
# Also atext and qtext from RFC 2822
# NB: this relies upon ASCII ordering; should not pose a problem
my $RE_ATEXT    = qr/[A-Za-z0-9!#\$\%\&\'\*\+\/=\?\^_\`\{\|\}\~\-]/;
my $RE_QTEXT    = qr/[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]/;
my $EMAIL_LHS_REGEXP    = qr/
		     # Local-part
		     (?:
		      (?:
			# Dot-string
			(?:$RE_ATEXT)+ (?: \. $RE_ATEXT+)*
		      ) | (?:
			# Quoted-string
			\" (?: \s* (?:
			 (?: $RE_QTEXT+ ) |
			 (?: \\ [\x01-\x09\x0b\x0c\x0e-\x7f] )
			) )* \"
		      )
		     )
		     /ox;
my $EMAIL_DOMAIN_REGEXP = qr/
		     # Domain
		     (?:
			(?:
			 # regular domain
			 (?:[A-Za-z0-9] (?: [A-Za-z0-9-]*[A-Za-z0-9] )?)
			 (?: \. [A-Za-z0-9] (?: [A-Za-z0-9-]*[A-Za-z0-9] )?)+
			) | (?:
			     # address-literals
			     \[
			       (?: $IPV4_REGEXP | (?: [Ii][Pp][vV]6: $IPV6_REGEXP ) )
			       # NOTSUPP: General-address-literal
			       # G-A-L is a hook for future literal addresses
			       # and only specifies tag:content
			     \]
			)
		     )
		     /ox;
my $EMAIL_REGEXP =
	qr/ (?: (?:$EMAIL_LHS_REGEXP) \@ (?:$EMAIL_DOMAIN_REGEXP) ) /ox;

my $EMAIL_BRIEF_ALLOWED_REGEXP = qr/ (?:
	$EMAIL_LHS_REGEXP
	(?: \@ $EMAIL_DOMAIN_REGEXP )?
	) /ox;
# }}}1EmailRegexps

my $HOSTNAME_REGEXP = qr/ \S+ /x;


# ######################################################################
# MAIN PROGRAM

use CDB_File;
use File::Spec;
use Getopt::Long;
use IO::File;
use NetAddr::IP qw(:lower);
use Pod::Usage;

sub ewarn;	# errno
sub gwarn;	# generic
sub lwarn;	# lineno
sub rebuild_cdb;

my $exitval = 0;
(my $progname = $0) =~ s!^.*/!!;
my $require_perms = 1;

my ($help, $man) = (0, 0);
GetOptions(
	"flat-dir=s"		=> \$flat_dir,
	"cdb-dir=s"		=> \$cdb_dir,
	"cdb-group=s"		=> \$cdb_group,
	"help|?"		=> \$help,
	"man"			=> \$man,
) or pod2usage(2);
pod2usage(1) if $help;
pod2usage(-existstatus => 0, -verbose => 2) if $man;

my $privfail = sub { die @_ };

if ($> != 0) {
	warn "${progname}: Not invoked as root, success optional\n";
	# we don't bump exit val, so don't use gwarn
	$privfail = sub { warn @_ };
}

if ($cdb_group eq '') {
	foreach my $grpname (@cdb_group_candidates) {
		my $t = getgrnam($grpname);
		if (defined $t) {
			$cdb_group = $grpname;
			last;
		}
	}
	die "${progname}: no candidate CDB group name found on system, aborting!\n" unless $cdb_group ne '';
}

# For IPv6, allow enumeration to handle up to 2^20 results per block
NetAddr::IP::netlimit 20;

opendir(FLATD, $flat_dir) or die "opendir($flat_dir) failed: $!\n";
my @flat_configs = grep {$_ !~ /^\./} readdir(FLATD);
closedir(FLATD);
if ($cdb_dir eq $flat_dir) {
	@flat_configs = grep {$_ !~ /\.cdb(?:\.\d+)?$/} @flat_configs;
}

unless (@flat_configs) {
	die "${progname}: no input files found\n";
}

if ($cdb_group =~ /\D+/) {
	my $tmpscalar = getgrnam($cdb_group);
	if (not defined $tmpscalar) {
		die "${progname}: no such group $cdb_group, aborting!\n";
	}
	$cdb_group = $tmpscalar;
}

my @cdbdir_stat = stat($cdb_dir);
unless (@cdbdir_stat) {
	die "${progname}: missing directory $cdb_dir\n";
}

if ($cdbdir_stat[5] != $cdb_group) {
	my $count = chown -1, $cdb_group, $cdb_dir;
	$privfail->("chgrp($cdb_dir) failed: $!\n") unless $count == 1;
}

umask 0027;

foreach my $fn (@flat_configs) {
	my $flat = File::Spec->catfile($flat_dir, $fn);
	my $cdb = File::Spec->catfile($cdb_dir, "$fn.cdb");
	my @st_flat = stat($flat);
	unless (@st_flat) {
		ewarn qq{Failed to stat "$flat"};
		next;
	}
	my @st_cdb = stat($cdb);

	if (@st_cdb == 0 or $st_flat[9] >= $st_cdb[9]) {
		print "Updating: $fn\n";
		rebuild_cdb($flat, $cdb);
		my @new_st_cdb = stat($cdb);
		if (@new_st_cdb) {
			if ($new_st_cdb[5] != $cdb_group) {
				my $count = chown -1, $cdb_group, $cdb;
				$privfail->("chgrp($cdb) failed: $!\n") unless $count == 1;
			}
		} else {
			ewarn qq{Failed to stat "$cdb"};
		}
	}
}

exit $exitval;

# ######################################################################
# SUBS

sub lwarn
{
	++$exitval if $exitval < 99;
	warn qq/${progname}: $_[1]\n\tline $. of $_[0]\n/;
}

sub ewarn
{
	++$exitval if $exitval < 99;
	warn qq/${progname}: $_[0]: $!\n/;
}

sub gwarn
{
	++$exitval if $exitval < 99;
	warn qq/${progname}: $_[0]\n/;
}

sub ldie
{
	die qq/${progname}: $_[1]\n\tline $. of $_[0]\n/;
}

sub cdb_insert
{
	my ($origin, $cdb, $key, $value, $options) = @_;
	return $cdb->insert($key, $value) unless $options->{netblock4_autosize} or $options->{netblock6_autosize};
	my $targ;
	my $block = NetAddr::IP->new($key);
	if ($block->version() == 6) {
		$targ = $options->{netblock6_autosize};
		return $cdb->insert($key, $value) if $targ == 0;
		ldie $origin, "IPv6 netblock6_autosize $targ out of range" unless $targ > 0 and $targ <= 128;
	} elsif ($block->version() == 4) {
		$targ = $options->{netblock4_autosize};
		return $cdb->insert($key, $value) if $targ == 0;
		ldie $origin, "IPv4 netblock4_autosize $targ out of range" unless $targ > 0 and $targ <= 32;
	} else {
		ldie $origin, "Unknown address family for [$key]";
	}
	ldie $origin, "Not an IP netblock key: $key" unless defined $block;
	my $c = 0;
	my $items;
	eval {
		$items = $block->splitref($targ);
	};
# module change; now dies, used to return undef; handle both
	if ($@) {
		lwarn $origin, "Unable to split [$key] to netblocks sized $targ";
		return $cdb->insert($key, $value);
	}
	unless (defined $items) {
		# Eg, asked for /19, got given a /20, complain but process
		lwarn $origin, "Unable to split [$key] to netblocks sized $targ";
		return $cdb->insert($key, $value);
	}
	foreach my $n (@{$items}) {
		$cdb->insert("$n", $value);
		++$c;
	}
	return $c;
}

sub rebuild_cdb
{
	my ($flat, $cdb, $def_key) = @_;
	$def_key = 'email' unless defined $def_key;

	my $def_is_lhs = 0;
	my $default_value = '';
	my %key_regexps = (
		email		=> $EMAIL_BRIEF_ALLOWED_REGEXP,
		ipv4_netblock	=> $IPV4_NETBLOCK_REGEXP,
		ipv6_netblock	=> $IPV6_NETBLOCK_REGEXP,
		ip_netblock	=> $IP_NETBLOCK_REGEXP,
		ipv4_address	=> $IPV4_REGEXP,
		ipv6_address	=> $IPV6_REGEXP,
		host		=> $HOSTNAME_REGEXP,
	);
	my %opts = (
		lowerkey => 0,
		key => lc $def_key,
		netblock4_autosize => 0,
		netblock6_autosize => 0,
	);
	my %opttype = (
		lowerkey => 'bool',
		key => 'keyregexp',
		netblock4_autosize => 'int',
		netblock6_autosize => 'int',
	);
	my %boolvals = (
		true	=> 1,
		yes	=> 1,
		on	=> 1,
		ja	=> 1,
		false	=> 0,
		'no'	=> 0,
		off	=> 0,
		nee	=> 0,
	);

	unless (exists $key_regexps{$opts{key}}) {
		gwarn "Key type \"$opts{key}\" not valid";
		return;
	}

	my $fh = new IO::File $flat, '<' or do {
		ewarn "read-open($flat) failed";
		return;
	};
	my $c = new CDB_File ($cdb, "$cdb.$$") or do {
		ewarn "CDB file creation ($cdb) failed";
		$fh->close();
		return;
	};

	while (<$fh>) {
		chomp;
		s/(?:^|\s)#(?!%).*\z//;
		s/^\s+//; s/\s+\z//;
		next if /^$/;

		if (/^\#?% (\S+) (?:\s+ (.+))?\z/x) {
			my ($k, $v) = (lc($1), $2);
			unless (exists $opts{$k}) {
				lwarn $flat, "unknown option {$k}";
				next;
			}
			if ($opttype{$k} eq 'bool') {
				$v = 1 unless defined $v;
				my $i = lc $v;
				if ($v =~ /^\d+\z/) {
					# cool
				} elsif (exists $boolvals{$i}) {
					$v = $boolvals{$i};
				} else {
					lwarn $flat, "unparsed boolean value for $k: $v";
					next;
				}
			} elsif ($opttype{$k} eq 'int') {
				$v = 0 unless defined $v;
				$v = int $v; # user may see perl complaint; okay
			} elsif ($opttype{$k} eq 'string') {
				$v = '' unless defined $v;
			} elsif ($opttype{$k} eq 'keyregexp') {
				my $valid = join ' ', sort keys %key_regexps;
				unless (defined $v) {
					lwarn $flat, "need a key type for $k; valid: $valid";
					next;
				}
				my $i = lc $v;
				unless (exists $key_regexps{$i}) {
					lwarn $flat, "key type \"$v\" not valid; allowed: $valid";
					next;
				}
				$v = $i;
			} else {
				die qq/${progname}: internal error, unhandled option type '$opttype{$k}' for option $k, line $. of $flat\n/;
			}
			$opts{$k} = $v;
			next;
		}
		if (/^:LHS\z/i) {
			$def_is_lhs = 1;
			undef $default_value;
			next;
		}
		if (/^:(?:-|NO|NONE)\z/) {
			undef $default_value;
			$def_is_lhs = 0;
			next;
		}
		if (/^:(?:EMPTY|BLANK)\z/) {
			$default_value = '';
			$def_is_lhs = 0;
			next;
		}

		if (/^ : \s* (.+) \z/x) {
			$default_value = $1;
			next;
		}

		my $key_matcher = $key_regexps{$opts{key}};

		if (/^($key_matcher)\z/) {
			my $lhs = $1;
			$lhs = lc($lhs) if $opts{lowerkey};
			if (defined $default_value) {
				cdb_insert($flat, $c, $lhs, $default_value, \%opts);
			} elsif ($def_is_lhs) {
				cdb_insert($flat, $c, $lhs, $lhs, \%opts);
			} else {
				lwarn $flat, 'no default value available';
				$fh->close();
				undef $c;
				return;
			}
			next;
		}

		if (/^ ($key_matcher)
			(?: \s+ (?! : ) | (?:
			\s* : \s*
			)) (\S.*?) \z /x) {
			my ($k, $v) = ($1, $2);
			$k = lc($k) if $opts{lowerkey};
			cdb_insert($flat, $c, $k, $v, \%opts);
			next;
		}

		lwarn $flat, "General parse error {$_}\n" .
				"\tKey-type=[$opts{key}]\n";
		$fh->close();
		undef $c;
		return;
	}

	$fh->close() or do {
		ewarn "read-close($flat) failed";
		undef $c; # untested interaction
		return;
	};
	$c->finish() or do {
		ewarn qq{finalisation failed for "$cdb"};
		return;
	}
}

__END__
=head1 NAME

update_mail_cdbs - Update mail-system CDB files

=head1 SYNOPSIS

update_mail_cdbs [devel-options]

=head1 OPTIONS

=over

=item B<--flat-dir>

Override the directory where flat files live.

=item B<--cdb-dir>

Override the directory where CDB files should be put.

=item B<--cdb-group>

Override the default group ownership of generated CDB files.

=back

=head1 DESCRIPTION

Take a bunch of flat files and generate CDB files from the contents,
with rules optimised for use in a mail-system.

This is appropriate for setting as C<bi_command> in Exim's configuration file.

=head1 FILE FORMAT

Files contain entries, one per line, mostly defining key-value pairs.

Leading and trailing whitespace is ignored.
Blank lines are ignored.

Directives are lines starting either C<%> or C<#%>.

Comments are lines starting with C<#> other than those starting C<#%>.

Lines starting C<:> change the default value for lines which only have keys.
Some special keywords are accepted, otherwise the line gives the literal value.

Items are formed as C<key : value> with optional whitespace and optional colon.
If the value needs to start with a colon, then use a colon as a separator too.

Keys are validated using one of a set of named validators;
the default is C<email>.

Directives and defaults apply to subsequent lines;
you can switch part-way through a file, though that's rarely a good idea.

=head2 Directives

Boolean values can be given in a few forms and if omitted is treated as true.
Thus a boolean directive on its own just means "turn this on".

=over

=item C<%lowerkey> I<bool>

Lower-case the key, for lookups.

=item C<%key> I<validator>

The key must pass the named validator;
default is C<email>, list of available validators is below.

=item C<%netblock4_autosize> I<int>

IPv4 netblock keys for netblocks larger than this size will be
split into multiple keys, each for a netblock of this size.

=item C<%netblock6_autosize> I<int>

IPv6 netblock keys for netblocks larger than this size will be
split into multiple keys, each for a netblock of this size.

=back

=head2 Validators

The C<%key> directive selects amongst these validators; the default is C<email>.

=over

=item C<email>

=item C<ipv4_netblock>

=item C<ipv6_netblock>

=item C<ip_netblock>

=item C<ipv4_address>

=item C<ipv6_address>

=item C<host>

=back

=head2 Default Values

The default default value is the empty string; thus entries mean that the key
will exist but the value will be empty.

Lines starting with a colon define the default value.
A few special keywords define behavior, else everything after the colon is the
new default value.

=over

=item C<:LHS>

The key will be used as the default value.

=item C<:NO> or C<:NONE>

There is no default value and lines lacking a default will become syntax errors.

=item C<:EMPTY> or C<:BLANK>

Use an empty string as the default value.

=back

=head1 EXAMPLES

A simple alias file:

  %lowerkey

  fred : fred@example.org
  frederic : fred
  wilma: wilma@example.org

A list of domain-mappings, for domain canonicalization:

  :LHS
  example.org
  example.com

  : example.net
  example.net
  foo.example.net
  bar.example.net

  :NONE
  baz.example.org: example.org

A list of (bogon) netblocks which are to be stored in the CDB as C</24> entries;
we use C<#%> so that the directives look like comments to most other parsers:

  #%key ipv4_netblock
  #%netblock4_autosize 24
  0.0.0.0/8
  10.0.0.0/8
  100.64.0.0/10
  127.0.0.0/8
  169.254.0.0/16
  172.16.0.0/12
  192.0.0.0/24
  192.0.2.0/24
  192.168.0.0/16
  198.18.0.0/15
  198.51.100.0/24
  203.0.113.0/24
  224.0.0.0/3

=head1 CAVEATS

There is currently no validation of the values, only of the keys.

=head1 AUTHOR

Phil Pennock.  PGP KeyID 0x4D1E900E14C1CC04.

=cut

# vim: set foldmethod=marker :
	#!/usr/bin/env perl

	# BUG: "#" can not appear in a value

	BEGIN { @INC = grep {$_ ne '.'} @INC };
	use warnings;
	use strict;

	# XXX: make these overrideable with getopt, together with "rebuild all", etc.
	my $flat_dir = '/etc/mail/flat';
	my $cdb_dir = '/etc/mail/cdb';
	my @cdb_group_candidates = qw( exim _exim );
	my $cdb_group = '';

	# {{{1IPAddressRegexps
	my $IPV4_OCTET = qr/(?:25[0-5]\|(?:[1-9]\|1[0-9]\|2[0-4])?[0-9])/;
	my $IPV4_REGEXP = qr/(?:(?:${IPV4_OCTET}\.){3}${IPV4_OCTET})/o;
	# Allow a.b.c.d/n a.b.c/n a.b/n a/n but not a.b.c./n (no trailing dot):
	my $IPV4_NETBLOCK_REGEXP = qr{(?: (?:
	$IPV4_OCTET (?: \. $IPV4_OCTET){0,3}
	) / (?:[1-9]\|[12][0-9]\|3[0-2])
	)}ox;
	# {{{2IPv6RegexpCommentary
	# RFC 3986 states:
	# IPv6address = 6( h16 ":" ) ls32
	# / "::" 5( h16 ":" ) ls32
	# / [ h16 ] "::" 4( h16 ":" ) ls32
	# / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
	# / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
	# / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
	# / [ *4( h16 ":" ) h16 ] "::" ls32
	# / [ *5( h16 ":" ) h16 ] "::" h16
	# / [ *6( h16 ":" ) h16 ] "::"
	#
	# ls32 = ( h16 ":" h16 ) / IPv4address
	# ; least-significant 32 bits of address
	#
	# h16 = 1*4HEXDIG
	# ; 16 bits of address represented in hexadecimal
	#
	# Note that we need to allow:
	# 1:2:3:4:5:6:7:8 -- 7 colons
	# ::2:3:4:5:6:7:8 -- 8 colons, skips leading 0
	# 1::3:4:5:6:7:8 -- 7 colons again; or fewer
	# 1:2:3:4:5:6::8 -- 7 or fewer
	# 1:2:3:4:5:6:7:: -- 8 colons, skips trailing 0
	# so there can be 8 colons only if two are doubled and are an affix.
	# Otherwise there's always 7 colons at most.
	#
	# RFC 4291: IPv6 Addressing Architecture
	# The use of "::" indicates one or more groups of 16 bits of zeros.
	# The "::" can only appear once in an address. The "::" can also be
	# used to compress leading or trailing zeros in an address.
	# That's "1 or more", not "2 or more", so in effect when it's an affix
	# there's a degenerate case where a colon (:) just replaces a zero (0).
	#
	# Bugs encountered after original writing:
	# * Was missing two /o optimisations
	# * Had extra \| at end of the last line, permitting empty string to match
	#
	# }}}2IPv6RegexpCommentary
	my $IPV6_H16 = qr/(?:[0-9a-fA-F]{1,4})/;
	my $IPV6_LS32 = qr/(?:(?:${IPV6_H16}:${IPV6_H16})\|${IPV4_REGEXP})/o;
	my $IPV6_REGEXP = qr/(?:
	(?:(?: (?:${IPV6_H16}:){6} )${IPV6_LS32}) \|
	(?:(?: :: (?:${IPV6_H16}:){5} )${IPV6_LS32}) \|
	(?:(?: (?: ${IPV6_H16} )? :: (?:${IPV6_H16}:){4} )${IPV6_LS32}) \|
	(?:(?: (?: (?:${IPV6_H16}:){0,1} ${IPV6_H16} )? :: (?:${IPV6_H16}:){3} )${IPV6_LS32}) \|
	(?:(?: (?: (?:${IPV6_H16}:){0,2} ${IPV6_H16} )? :: (?:${IPV6_H16}:){2} )${IPV6_LS32}) \|
	(?:(?: (?: (?:${IPV6_H16}:){0,3} ${IPV6_H16} )? :: ${IPV6_H16}: )${IPV6_LS32}) \|
	(?:(?: (?: (?:${IPV6_H16}:){0,4} ${IPV6_H16} )? :: )${IPV6_LS32}) \|
	(?:(?: (?: (?:${IPV6_H16}:){0,5} ${IPV6_H16} )? :: )${IPV6_H16} ) \|
	(?:(?: (?: (?:${IPV6_H16}:){0,6} ${IPV6_H16} )? :: ) )
	)/ox;
	my $IPV6_NETBLOCK_REGEXP = qr{(?:
	$IPV6_REGEXP / (?:[1-9]\|[1-9][0-9]\|1[01][0-9]\|12[0-8])
	)}ox;
	my $IP_NETBLOCK_REGEXP = qr/(?: $IPV4_NETBLOCK_REGEXP \| $IPV6_NETBLOCK_REGEXP )/ox;
	# }}}1IPAddressRegexps
	# {{{1EmailRegexps
	# See RFC 2821 (not all comments are terminology from there)
	# Also atext and qtext from RFC 2822
	# NB: this relies upon ASCII ordering; should not pose a problem
	my $RE_ATEXT = qr/[A-Za-z0-9!#\$\%\&\'\*\+\/=\?\^_\`\{\\|\}\~\-]/;
	my $RE_QTEXT = qr/[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]/;
	my $EMAIL_LHS_REGEXP = qr/
	# Local-part
	(?:
	(?:
	# Dot-string
	(?:$RE_ATEXT)+ (?: \. $RE_ATEXT+)*
	) \| (?:
	# Quoted-string
	\" (?: \s* (?:
	(?: $RE_QTEXT+ ) \|
	(?: \\ [\x01-\x09\x0b\x0c\x0e-\x7f] )
	) )* \"
	)
	)
	/ox;
	my $EMAIL_DOMAIN_REGEXP = qr/
	# Domain
	(?:
	(?:
	# regular domain
	(?:[A-Za-z0-9] (?: [A-Za-z0-9-]*[A-Za-z0-9] )?)
	(?: \. [A-Za-z0-9] (?: [A-Za-z0-9-]*[A-Za-z0-9] )?)+
	) \| (?:
	# address-literals
	\[
	(?: $IPV4_REGEXP \| (?: [Ii][Pp][vV]6: $IPV6_REGEXP ) )
	# NOTSUPP: General-address-literal
	# G-A-L is a hook for future literal addresses
	# and only specifies tag:content
	\]
	)
	)
	/ox;
	my $EMAIL_REGEXP =
	qr/ (?: (?:$EMAIL_LHS_REGEXP) \@ (?:$EMAIL_DOMAIN_REGEXP) ) /ox;

	my $EMAIL_BRIEF_ALLOWED_REGEXP = qr/ (?:
	$EMAIL_LHS_REGEXP
	(?: \@ $EMAIL_DOMAIN_REGEXP )?
	) /ox;
	# }}}1EmailRegexps

	my $HOSTNAME_REGEXP = qr/ \S+ /x;


	# ######################################################################
	# MAIN PROGRAM

	use CDB_File;
	use File::Spec;
	use Getopt::Long;
	use IO::File;
	use NetAddr::IP qw(:lower);
	use Pod::Usage;

	sub ewarn; # errno
	sub gwarn; # generic
	sub lwarn; # lineno
	sub rebuild_cdb;

	my $exitval = 0;
	(my $progname = $0) =~ s!^.*/!!;
	my $require_perms = 1;

	my ($help, $man) = (0, 0);
	GetOptions(
	"flat-dir=s" => \$flat_dir,
	"cdb-dir=s" => \$cdb_dir,
	"cdb-group=s" => \$cdb_group,
	"help\|?" => \$help,
	"man" => \$man,
	) or pod2usage(2);
	pod2usage(1) if $help;
	pod2usage(-existstatus => 0, -verbose => 2) if $man;

	my $privfail = sub { die @_ };

	if ($> != 0) {
	warn "${progname}: Not invoked as root, success optional\n";
	# we don't bump exit val, so don't use gwarn
	$privfail = sub { warn @_ };
	}

	if ($cdb_group eq '') {
	foreach my $grpname (@cdb_group_candidates) {
	my $t = getgrnam($grpname);
	if (defined $t) {
	$cdb_group = $grpname;
	last;
	}
	}
	die "${progname}: no candidate CDB group name found on system, aborting!\n" unless $cdb_group ne '';
	}

	# For IPv6, allow enumeration to handle up to 2^20 results per block
	NetAddr::IP::netlimit 20;

	opendir(FLATD, $flat_dir) or die "opendir($flat_dir) failed: $!\n";
	my @flat_configs = grep {$_ !~ /^\./} readdir(FLATD);
	closedir(FLATD);
	if ($cdb_dir eq $flat_dir) {
	@flat_configs = grep {$_ !~ /\.cdb(?:\.\d+)?$/} @flat_configs;
	}

	unless (@flat_configs) {
	die "${progname}: no input files found\n";
	}

	if ($cdb_group =~ /\D+/) {
	my $tmpscalar = getgrnam($cdb_group);
	if (not defined $tmpscalar) {
	die "${progname}: no such group $cdb_group, aborting!\n";
	}
	$cdb_group = $tmpscalar;
	}

	my @cdbdir_stat = stat($cdb_dir);
	unless (@cdbdir_stat) {
	die "${progname}: missing directory $cdb_dir\n";
	}

	if ($cdbdir_stat[5] != $cdb_group) {
	my $count = chown -1, $cdb_group, $cdb_dir;
	$privfail->("chgrp($cdb_dir) failed: $!\n") unless $count == 1;
	}

	umask 0027;

	foreach my $fn (@flat_configs) {
	my $flat = File::Spec->catfile($flat_dir, $fn);
	my $cdb = File::Spec->catfile($cdb_dir, "$fn.cdb");
	my @st_flat = stat($flat);
	unless (@st_flat) {
	ewarn qq{Failed to stat "$flat"};
	next;
	}
	my @st_cdb = stat($cdb);

	if (@st_cdb == 0 or $st_flat[9] >= $st_cdb[9]) {
	print "Updating: $fn\n";
	rebuild_cdb($flat, $cdb);
	my @new_st_cdb = stat($cdb);
	if (@new_st_cdb) {
	if ($new_st_cdb[5] != $cdb_group) {
	my $count = chown -1, $cdb_group, $cdb;
	$privfail->("chgrp($cdb) failed: $!\n") unless $count == 1;
	}
	} else {
	ewarn qq{Failed to stat "$cdb"};
	}
	}
	}

	exit $exitval;

	# ######################################################################
	# SUBS

	sub lwarn
	{
	++$exitval if $exitval < 99;
	warn qq/${progname}: $_[1]\n\tline $. of $_[0]\n/;
	}

	sub ewarn
	{
	++$exitval if $exitval < 99;
	warn qq/${progname}: $_[0]: $!\n/;
	}

	sub gwarn
	{
	++$exitval if $exitval < 99;
	warn qq/${progname}: $_[0]\n/;
	}

	sub ldie
	{
	die qq/${progname}: $_[1]\n\tline $. of $_[0]\n/;
	}

	sub cdb_insert
	{
	my ($origin, $cdb, $key, $value, $options) = @_;
	return $cdb->insert($key, $value) unless $options->{netblock4_autosize} or $options->{netblock6_autosize};
	my $targ;
	my $block = NetAddr::IP->new($key);
	if ($block->version() == 6) {
	$targ = $options->{netblock6_autosize};
	return $cdb->insert($key, $value) if $targ == 0;
	ldie $origin, "IPv6 netblock6_autosize $targ out of range" unless $targ > 0 and $targ <= 128;
	} elsif ($block->version() == 4) {
	$targ = $options->{netblock4_autosize};
	return $cdb->insert($key, $value) if $targ == 0;
	ldie $origin, "IPv4 netblock4_autosize $targ out of range" unless $targ > 0 and $targ <= 32;
	} else {
	ldie $origin, "Unknown address family for [$key]";
	}
	ldie $origin, "Not an IP netblock key: $key" unless defined $block;
	my $c = 0;
	my $items;
	eval {
	$items = $block->splitref($targ);
	};
	# module change; now dies, used to return undef; handle both
	if ($@) {
	lwarn $origin, "Unable to split [$key] to netblocks sized $targ";
	return $cdb->insert($key, $value);
	}
	unless (defined $items) {
	# Eg, asked for /19, got given a /20, complain but process
	lwarn $origin, "Unable to split [$key] to netblocks sized $targ";
	return $cdb->insert($key, $value);
	}
	foreach my $n (@{$items}) {
	$cdb->insert("$n", $value);
	++$c;
	}
	return $c;
	}

	sub rebuild_cdb
	{
	my ($flat, $cdb, $def_key) = @_;
	$def_key = 'email' unless defined $def_key;

	my $def_is_lhs = 0;
	my $default_value = '';
	my %key_regexps = (
	email => $EMAIL_BRIEF_ALLOWED_REGEXP,
	ipv4_netblock => $IPV4_NETBLOCK_REGEXP,
	ipv6_netblock => $IPV6_NETBLOCK_REGEXP,
	ip_netblock => $IP_NETBLOCK_REGEXP,
	ipv4_address => $IPV4_REGEXP,
	ipv6_address => $IPV6_REGEXP,
	host => $HOSTNAME_REGEXP,
	);
	my %opts = (
	lowerkey => 0,
	key => lc $def_key,
	netblock4_autosize => 0,
	netblock6_autosize => 0,
	);
	my %opttype = (
	lowerkey => 'bool',
	key => 'keyregexp',
	netblock4_autosize => 'int',
	netblock6_autosize => 'int',
	);
	my %boolvals = (
	true => 1,
	yes => 1,
	on => 1,
	ja => 1,
	false => 0,
	'no' => 0,
	off => 0,
	nee => 0,
	);

	unless (exists $key_regexps{$opts{key}}) {
	gwarn "Key type \"$opts{key}\" not valid";
	return;
	}

	my $fh = new IO::File $flat, '<' or do {
	ewarn "read-open($flat) failed";
	return;
	};
	my $c = new CDB_File ($cdb, "$cdb.$$") or do {
	ewarn "CDB file creation ($cdb) failed";
	$fh->close();
	return;
	};

	while (<$fh>) {
	chomp;
	s/(?:^\|\s)#(?!%).*\z//;
	s/^\s+//; s/\s+\z//;
	next if /^$/;

	if (/^\#?% (\S+) (?:\s+ (.+))?\z/x) {
	my ($k, $v) = (lc($1), $2);
	unless (exists $opts{$k}) {
	lwarn $flat, "unknown option {$k}";
	next;
	}
	if ($opttype{$k} eq 'bool') {
	$v = 1 unless defined $v;
	my $i = lc $v;
	if ($v =~ /^\d+\z/) {
	# cool
	} elsif (exists $boolvals{$i}) {
	$v = $boolvals{$i};
	} else {
	lwarn $flat, "unparsed boolean value for $k: $v";
	next;
	}
	} elsif ($opttype{$k} eq 'int') {
	$v = 0 unless defined $v;
	$v = int $v; # user may see perl complaint; okay
	} elsif ($opttype{$k} eq 'string') {
	$v = '' unless defined $v;
	} elsif ($opttype{$k} eq 'keyregexp') {
	my $valid = join ' ', sort keys %key_regexps;
	unless (defined $v) {
	lwarn $flat, "need a key type for $k; valid: $valid";
	next;
	}
	my $i = lc $v;
	unless (exists $key_regexps{$i}) {
	lwarn $flat, "key type \"$v\" not valid; allowed: $valid";
	next;
	}
	$v = $i;
	} else {
	die qq/${progname}: internal error, unhandled option type '$opttype{$k}' for option $k, line $. of $flat\n/;
	}
	$opts{$k} = $v;
	next;
	}
	if (/^:LHS\z/i) {
	$def_is_lhs = 1;
	undef $default_value;
	next;
	}
	if (/^:(?:-\|NO\|NONE)\z/) {
	undef $default_value;
	$def_is_lhs = 0;
	next;
	}
	if (/^:(?:EMPTY\|BLANK)\z/) {
	$default_value = '';
	$def_is_lhs = 0;
	next;
	}

	if (/^ : \s* (.+) \z/x) {
	$default_value = $1;
	next;
	}

	my $key_matcher = $key_regexps{$opts{key}};

	if (/^($key_matcher)\z/) {
	my $lhs = $1;
	$lhs = lc($lhs) if $opts{lowerkey};
	if (defined $default_value) {
	cdb_insert($flat, $c, $lhs, $default_value, \%opts);
	} elsif ($def_is_lhs) {
	cdb_insert($flat, $c, $lhs, $lhs, \%opts);
	} else {
	lwarn $flat, 'no default value available';
	$fh->close();
	undef $c;
	return;
	}
	next;
	}

	if (/^ ($key_matcher)
	(?: \s+ (?! : ) \| (?:
	\s* : \s*
	)) (\S.*?) \z /x) {
	my ($k, $v) = ($1, $2);
	$k = lc($k) if $opts{lowerkey};
	cdb_insert($flat, $c, $k, $v, \%opts);
	next;
	}

	lwarn $flat, "General parse error {$_}\n" .
	"\tKey-type=[$opts{key}]\n";
	$fh->close();
	undef $c;
	return;
	}

	$fh->close() or do {
	ewarn "read-close($flat) failed";
	undef $c; # untested interaction
	return;
	};
	$c->finish() or do {
	ewarn qq{finalisation failed for "$cdb"};
	return;
	}
	}

	__END__
	=head1 NAME

	update_mail_cdbs - Update mail-system CDB files

	=head1 SYNOPSIS

	update_mail_cdbs [devel-options]

	=head1 OPTIONS

	=over

	=item B<--flat-dir>

	Override the directory where flat files live.

	=item B<--cdb-dir>

	Override the directory where CDB files should be put.

	=item B<--cdb-group>

	Override the default group ownership of generated CDB files.

	=back

	=head1 DESCRIPTION

	Take a bunch of flat files and generate CDB files from the contents,
	with rules optimised for use in a mail-system.

	This is appropriate for setting as C<bi_command> in Exim's configuration file.

	=head1 FILE FORMAT

	Files contain entries, one per line, mostly defining key-value pairs.

	Leading and trailing whitespace is ignored.
	Blank lines are ignored.

	Directives are lines starting either C<%> or C<#%>.

	Comments are lines starting with C<#> other than those starting C<#%>.

	Lines starting C<:> change the default value for lines which only have keys.
	Some special keywords are accepted, otherwise the line gives the literal value.

	Items are formed as C<key : value> with optional whitespace and optional colon.
	If the value needs to start with a colon, then use a colon as a separator too.

	Keys are validated using one of a set of named validators;
	the default is C<email>.

	Directives and defaults apply to subsequent lines;
	you can switch part-way through a file, though that's rarely a good idea.

	=head2 Directives

	Boolean values can be given in a few forms and if omitted is treated as true.
	Thus a boolean directive on its own just means "turn this on".

	=over

	=item C<%lowerkey> I<bool>

	Lower-case the key, for lookups.

	=item C<%key> I<validator>

	The key must pass the named validator;
	default is C<email>, list of available validators is below.

	=item C<%netblock4_autosize> I<int>

	IPv4 netblock keys for netblocks larger than this size will be
	split into multiple keys, each for a netblock of this size.

	=item C<%netblock6_autosize> I<int>

	IPv6 netblock keys for netblocks larger than this size will be
	split into multiple keys, each for a netblock of this size.

	=back

	=head2 Validators

	The C<%key> directive selects amongst these validators; the default is C<email>.

	=over

	=item C<email>

	=item C<ipv4_netblock>

	=item C<ipv6_netblock>

	=item C<ip_netblock>

	=item C<ipv4_address>

	=item C<ipv6_address>

	=item C<host>

	=back

	=head2 Default Values

	The default default value is the empty string; thus entries mean that the key
	will exist but the value will be empty.

	Lines starting with a colon define the default value.
	A few special keywords define behavior, else everything after the colon is the
	new default value.

	=over

	=item C<:LHS>

	The key will be used as the default value.

	=item C<:NO> or C<:NONE>

	There is no default value and lines lacking a default will become syntax errors.

	=item C<:EMPTY> or C<:BLANK>

	Use an empty string as the default value.

	=back

	=head1 EXAMPLES

	A simple alias file:

	%lowerkey

	fred : fred@example.org
	frederic : fred
	wilma: wilma@example.org

	A list of domain-mappings, for domain canonicalization:

	:LHS
	example.org
	example.com

	: example.net
	example.net
	foo.example.net
	bar.example.net

	:NONE
	baz.example.org: example.org

	A list of (bogon) netblocks which are to be stored in the CDB as C</24> entries;
	we use C<#%> so that the directives look like comments to most other parsers:

	#%key ipv4_netblock
	#%netblock4_autosize 24
	0.0.0.0/8
	10.0.0.0/8
	100.64.0.0/10
	127.0.0.0/8
	169.254.0.0/16
	172.16.0.0/12
	192.0.0.0/24
	192.0.2.0/24
	192.168.0.0/16
	198.18.0.0/15
	198.51.100.0/24
	203.0.113.0/24
	224.0.0.0/3

	=head1 CAVEATS

	There is currently no validation of the values, only of the keys.

	=head1 AUTHOR

	Phil Pennock. PGP KeyID 0x4D1E900E14C1CC04.

	=cut

	# vim: set foldmethod=marker :