eqhmcow/apache-preconnect-workaround-README

## apache-preconnect-workaround-README
script to kill apache 1.3 / 2.x prefork httpd processes serving preconnect connections, in an attempt to prevent chrome from causing a DoS against the httpd

This is version 2 of https://gist.github.com/eqhmcow/4774549

The major change is the use of two processes instead of one:

One process makes server-status requests, the other kills idle prefork processes.

This allows the kill script to continue killing idle processes even when Chrome has effectively DoS'd the apache server. When this happens, the check script can't get an updated status response back immediately, but the kill script can hopefully free up a slot by killing some processes. Having an uninterrupted request in the socket queue allows us to get an updated response after killing the Chrome preconnections are killed.

The two-process solution allows us to hold the uninterrupted server status request open while the kill loop continues running.

## check_httpd_server_status.pl
#!/bin/env perl

# check_httpd_server_status.pl - make continuous requests to apache's server status page
# $Id$

use strict;
use warnings;

use Storable;

# chrome likes to preconnect to servers, which causes issues since there are
# a limited number of httpd slots available, and chrome preconnections can
# exhaust them all. this script kills httpds serving preconnect connections,
# preventing chrome from causing a DoS against the httpd.
# see also https://code.google.com/p/chromium/issues/detail?id=85229

# This is version 2 of https://gist.github.com/eqhmcow/4774549

# This script should be run concurrently with kill_idle_httpd.pl
# This script writes a file to share data with the kill script
# To allow the shared data file to be written to shared memory instead of a
# physical disk, chdir to /dev/shm (or a subdir under there) before running.

my $status_url = 'http://localhost/server-status';
my $expected_total = 10; # total number of available httpd slots

my @status;
while (1) {
    @status = qx!/usr/bin/elinks -dump 1 $status_url |
        /bin/grep -P '^\\s*\\d+-\\d+\\s' |
        /bin/grep -P '/\\d+\\s+[_SRWKDLG.] \\d'
    !;

    store(\@status, 'status.tmp');
    rename('status.tmp', 'status');

    print_status();

    sleep 1;
}

sub print_status {
    # parse and classify httpd process slots
    my %p;
    my @m = (
      { m => '_', v => 'waiting' },
      { m => 'R', v => 'reading' },
      { m => 'W', v => 'sending' },
      { m => '.', v => 'open' },
    );
    foreach my $line (@status) {
        my ($slot, $pid, $mode, $ss, $rest) = ($line =~
            m!^\s*
            (\d+)               # slot
            -\d+                # generation
            \s+(\d+|-)          # pid
            \s+\d+/\d+/\d+      # accesses per connection / child / slot
            \s+([_SRWKDLG.])    # mode
            \s+\d+\.\d+         # CPU usage
            \s+(\d+)            # SS - seconds since beginning of most recent request (or not, see below)
            \s+\d+              # Milliseconds required to process most recent request
            \s+\d+\.\d+         # Kilobytes transferred this connection
            \s+\d+\.\d+         # Megabytes transferred this child
            \s+\d+\.\d+         # Total megabytes transferred this slot
            (.*)$               # host, vhost, request
        !x);

        # try again if the pid is 0
        return if $pid eq '0';
        $p{'total'}++;

        my $found = 0;
        foreach my $m (@m) {
            if ($mode eq $m->{m}) {
                $p{$m->{v}}{'count'}++;
                $found = 1;
                last;
            }
        }
        $p{'other'}++ unless $found;
    }

    $p{'total'} += 0; # make numeric
    # if we didn't parse any lines, the server is probably down; try again
    unless ($p{'total'}) {
        print scalar localtime(), ": couldn't parse any lines, assuming httpd is restarting; trying again\n";
        return;
    }

    $p{'waiting'}{'count'} ||= 0;
    $p{'open'}{'count'} ||= 0;

    # update the open count based on the max slot config; when apache has just started this may differ
    if ($p{'total'} < $expected_total) {
        $p{'open'}{'count'} += $expected_total - $p{'total'};
    }

    # print a quick status update
    print $p{'waiting'}{'count'} + $p{'open'}{'count'}, ".";

    return;
}

## kill_idle_httpd.pl
#!/bin/env perl

# kill_idle_httpd.pl - kill idle httpd connections
# $Id$

use strict;
use warnings;

use Time::HiRes 'time';
use Storable;

# chrome likes to preconnect to servers, which causes issues since there are
# a limited number of httpd slots available, and chrome preconnections can
# exhaust them all. this script kills httpds serving preconnect connections,
# preventing chrome from causing a DoS against the httpd.
# see also https://code.google.com/p/chromium/issues/detail?id=85229

# This is version 2 of https://gist.github.com/eqhmcow/4774549

# This script should be run concurrently with check_httpd_server_status.pl
# This script reads a file to share data with the check script
# To allow the shared data file to be read from shared memory instead of a
# physical disk, chdir to /dev/shm (or a subdir under there) before running.

my $expected_total = 10; # total number of available httpd slots
my $waiting_threshold = 4; # start killing when we have <= this many slots available to receive a request
my $let_idle = 4; # kill connections after they idle for this many seconds
my $kill_idle = 1; # kill up to this many idle connections before requesting new server status
my $rekill_after = 16; # kill stuck processes after this many seconds

$|++;

# main loop
while (1) {
    my ($p, $k) = check_connections();
    if ($k) {
        kill_connections($p);
    }

    # sometimes when we kill a process, it hangs instead of exiting. kill -9
    # any such processes
    rekill_stuck();

    sleep 1;
}

# pids that were killed
my @killed;

# slots that we're tracking
my %slots;

# last apache server status data
my @status;

sub check_connections {
    print ".";
    my $data = retrieve('status');
    die "Couldn't read status file" unless ref($data) eq 'ARRAY';
    @status = @$data;

    # key:
    #   "_" Waiting for Connection, "S" Starting up, "R" Reading Request,
    #   "W" Sending Reply, "K" Keepalive (read), "D" DNS Lookup,
    #   "L" Logging, "G" Gracefully finishing, "." Open slot with no current
    #   process

    my $time = time();

    # parse and classify httpd process slots
    my %p;
    my @m = (
      { m => '_', v => 'waiting' },
      { m => 'R', v => 'reading' },
      { m => 'W', v => 'sending' },
      { m => '.', v => 'open' },
    );
    # Couldn't parse 4-0 0     0/0/10634  S 0.00 4   0   0.0  0.00  26.44 127.0.0.1 (unavailable)     GET /server-status HTTP/1.1
    foreach my $line (@status) {
        my ($slot, $pid, $mode, $ss, $rest) = ($line =~
            m!^\s*
            (\d+)               # slot
            -\d+                # generation
            \s+(\d+|-)          # pid
            \s+\d+/\d+/\d+      # accesses per connection / child / slot
            \s+([_SRWKDLG.])    # mode
            \s+\d+\.\d+         # CPU usage
            \s+(\d+)            # SS - seconds since beginning of most recent request (or not, see below)
            \s+\d+              # Milliseconds required to process most recent request
            \s+\d+\.\d+         # Kilobytes transferred this connection
            \s+\d+\.\d+         # Megabytes transferred this child
            \s+\d+\.\d+         # Total megabytes transferred this slot
            (.*)$               # host, vhost, request
        !x);
        # NOTE: SS value is not actually usable since it doesn't reset when
        # apache accepts a connection; so we have to keep track of it ourselves

        # try again if the pid is 0
        return (undef, 0) if $pid eq '0';
        die "Couldn't parse $line" unless $pid;
        $p{'total'}++;

        # normalize spaces
        $rest =~ s/\s+/ /g;

        # keep track of when a slot starts processing a new request by
        # tracking changes
        if ($pid ne $slots{$slot}{'pid'} or
            $mode ne $slots{$slot}{'mode'} or
            $rest ne $slots{$slot}{'rest'}
        ) {
            $slots{$slot}{'time'} = $time;
        }

        my $slot_time = $slots{$slot}{'time'} || $time;
        $slots{$slot} = {
            'pid'   => $pid,
            'mode'  => $mode,
            'rest'  => $rest,
            'time'  => $slot_time,
        };

        my $found = 0;
        foreach my $m (@m) {
            if ($mode eq $m->{m}) {
                $p{$m->{v}}{'count'}++;
                $p{$m->{v}}{'pids'} ||= [];
                push @{$p{$m->{v}}{'pids'}}, [ $slot, $pid, $rest, $slot_time ];
                $found = 1;
                last;
            }
        }
        $p{'other'}++ unless $found;
#        print "pid [$pid] mode [$mode] rest [$rest]\n";
    }

    $p{'total'} += 0; # make numeric
    # if we didn't parse any lines, the server is probably down; try again
    unless ($p{'total'}) {
        print scalar localtime(), ": couldn't parse any lines, assuming httpd is restarting; trying again\n";
        return (undef, 0);
    }

#    print "Couldn't find all the slots, is apache still initializing?\n@status"
#        unless $p{'total'} == $expected_total;

    $p{'waiting'}{'count'} ||= 0;
    $p{'open'}{'count'} ||= 0;

    # update the open count based on the max slot config; when apache has just started this may differ
    if ($p{'total'} < $expected_total) {
        $p{'open'}{'count'} += $expected_total - $p{'total'};
    }

    # print a quick status update
    print $p{'waiting'}{'count'} + $p{'open'}{'count'};

    # if there are <= threshold waiting slots, and no open slots, then kill
    # any idle reading slots
    if ($p{'waiting'}{'count'} <= $waiting_threshold and not $p{'open'}{'count'}) {
       return (\%p, 1);
    }

    return (\%p, 0);
}

sub kill_connections {
    my $p = shift;

    print "\n", scalar localtime(), ": looking to kill idle read connections\n@status\n";
    system(q!sudo netstat -np | grep 'ESTABLISHED' | egrep ':(80|443) '!);

    # check for idle reading connections
    # idle connections look like:
    # 0-0 30044 0/20/10453 R 1.00 7   0   0.0  1.27  25.39 ? ? ..reading..

    $p->{'reading'}{'pids'} ||= [];
    my $i = 0 ;
    my $time = time();
    foreach my $slot (@{$p->{'reading'}{'pids'}}) {
        my $s    = $slot->[0];
        my $pid  = $slot->[1];
        my $rest = $slot->[2];
        my $slot_time = $slot->[3];

        # find a slot that's accepting a request
        next unless $rest =~ m/\s+\?\s+\?\s+\Q..reading..\E/;

        # make sure it's been idle
        my $idle = $time - $slot_time;
        next unless $idle > $let_idle;

        # skip if we already killed it
        next if grep $pid == $_->[0], @killed;

        # kill
        print scalar localtime(), ": killing pid: [$pid] idle for: [$idle] info: $rest\n";
        system("sudo kill $pid");
        push @killed, [ $pid, $time ];

        $i++;
        last if $i >= $kill_idle;
    }

    return;
}

sub rekill_stuck {
    my $time = time();
    foreach my $k (splice @killed) {
        my $pid = $k->[0];
        my $t   = $k->[1];

        # use /proc to check pid
        next unless -r "/proc/$pid";

#        print "\n", scalar localtime(), ": killed process still active: $pid\n";
        push @killed, $k;

        next unless $time - $t > $rekill_after;

        print "\n", scalar localtime(), ": kill -9 pid: $pid\n";
        system("sudo kill -9 $pid");
    }

    return;
}

## preconnect-talk.pdf

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              preconnect-talk.pdf
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	script to kill apache 1.3 / 2.x prefork httpd processes serving preconnect connections, in an attempt to prevent chrome from causing a DoS against the httpd

	This is version 2 of https://gist.github.com/eqhmcow/4774549

	The major change is the use of two processes instead of one:

	One process makes server-status requests, the other kills idle prefork processes.

	This allows the kill script to continue killing idle processes even when Chrome has effectively DoS'd the apache server. When this happens, the check script can't get an updated status response back immediately, but the kill script can hopefully free up a slot by killing some processes. Having an uninterrupted request in the socket queue allows us to get an updated response after killing the Chrome preconnections are killed.

	The two-process solution allows us to hold the uninterrupted server status request open while the kill loop continues running.
	#!/bin/env perl

	# check_httpd_server_status.pl - make continuous requests to apache's server status page
	# $Id$

	use strict;
	use warnings;

	use Storable;

	# chrome likes to preconnect to servers, which causes issues since there are
	# a limited number of httpd slots available, and chrome preconnections can
	# exhaust them all. this script kills httpds serving preconnect connections,
	# preventing chrome from causing a DoS against the httpd.
	# see also https://code.google.com/p/chromium/issues/detail?id=85229

	# This is version 2 of https://gist.github.com/eqhmcow/4774549

	# This script should be run concurrently with kill_idle_httpd.pl
	# This script writes a file to share data with the kill script
	# To allow the shared data file to be written to shared memory instead of a
	# physical disk, chdir to /dev/shm (or a subdir under there) before running.

	my $status_url = 'http://localhost/server-status';
	my $expected_total = 10; # total number of available httpd slots

	my @status;
	while (1) {
	@status = qx!/usr/bin/elinks -dump 1 $status_url \|
	/bin/grep -P '^\\s*\\d+-\\d+\\s' \|
	/bin/grep -P '/\\d+\\s+[_SRWKDLG.] \\d'
	!;

	store(\@status, 'status.tmp');
	rename('status.tmp', 'status');

	print_status();

	sleep 1;
	}

	sub print_status {
	# parse and classify httpd process slots
	my %p;
	my @m = (
	{ m => '_', v => 'waiting' },
	{ m => 'R', v => 'reading' },
	{ m => 'W', v => 'sending' },
	{ m => '.', v => 'open' },
	);
	foreach my $line (@status) {
	my ($slot, $pid, $mode, $ss, $rest) = ($line =~
	m!^\s*
	(\d+) # slot
	-\d+ # generation
	\s+(\d+\|-) # pid
	\s+\d+/\d+/\d+ # accesses per connection / child / slot
	\s+([_SRWKDLG.]) # mode
	\s+\d+\.\d+ # CPU usage
	\s+(\d+) # SS - seconds since beginning of most recent request (or not, see below)
	\s+\d+ # Milliseconds required to process most recent request
	\s+\d+\.\d+ # Kilobytes transferred this connection
	\s+\d+\.\d+ # Megabytes transferred this child
	\s+\d+\.\d+ # Total megabytes transferred this slot
	(.*)$ # host, vhost, request
	!x);

	# try again if the pid is 0
	return if $pid eq '0';
	$p{'total'}++;

	my $found = 0;
	foreach my $m (@m) {
	if ($mode eq $m->{m}) {
	$p{$m->{v}}{'count'}++;
	$found = 1;
	last;
	}
	}
	$p{'other'}++ unless $found;
	}

	$p{'total'} += 0; # make numeric
	# if we didn't parse any lines, the server is probably down; try again
	unless ($p{'total'}) {
	print scalar localtime(), ": couldn't parse any lines, assuming httpd is restarting; trying again\n";
	return;
	}

	$p{'waiting'}{'count'} \|\|= 0;
	$p{'open'}{'count'} \|\|= 0;

	# update the open count based on the max slot config; when apache has just started this may differ
	if ($p{'total'} < $expected_total) {
	$p{'open'}{'count'} += $expected_total - $p{'total'};
	}

	# print a quick status update
	print $p{'waiting'}{'count'} + $p{'open'}{'count'}, ".";

	return;
	}
	#!/bin/env perl

	# kill_idle_httpd.pl - kill idle httpd connections
	# $Id$

	use strict;
	use warnings;

	use Time::HiRes 'time';
	use Storable;

	# chrome likes to preconnect to servers, which causes issues since there are
	# a limited number of httpd slots available, and chrome preconnections can
	# exhaust them all. this script kills httpds serving preconnect connections,
	# preventing chrome from causing a DoS against the httpd.
	# see also https://code.google.com/p/chromium/issues/detail?id=85229

	# This is version 2 of https://gist.github.com/eqhmcow/4774549

	# This script should be run concurrently with check_httpd_server_status.pl
	# This script reads a file to share data with the check script
	# To allow the shared data file to be read from shared memory instead of a
	# physical disk, chdir to /dev/shm (or a subdir under there) before running.

	my $expected_total = 10; # total number of available httpd slots
	my $waiting_threshold = 4; # start killing when we have <= this many slots available to receive a request
	my $let_idle = 4; # kill connections after they idle for this many seconds
	my $kill_idle = 1; # kill up to this many idle connections before requesting new server status
	my $rekill_after = 16; # kill stuck processes after this many seconds

	$\|++;

	# main loop
	while (1) {
	my ($p, $k) = check_connections();
	if ($k) {
	kill_connections($p);
	}

	# sometimes when we kill a process, it hangs instead of exiting. kill -9
	# any such processes
	rekill_stuck();

	sleep 1;
	}

	# pids that were killed
	my @killed;

	# slots that we're tracking
	my %slots;

	# last apache server status data
	my @status;

	sub check_connections {
	print ".";
	my $data = retrieve('status');
	die "Couldn't read status file" unless ref($data) eq 'ARRAY';
	@status = @$data;

	# key:
	# "_" Waiting for Connection, "S" Starting up, "R" Reading Request,
	# "W" Sending Reply, "K" Keepalive (read), "D" DNS Lookup,
	# "L" Logging, "G" Gracefully finishing, "." Open slot with no current
	# process

	my $time = time();

	# parse and classify httpd process slots
	my %p;
	my @m = (
	{ m => '_', v => 'waiting' },
	{ m => 'R', v => 'reading' },
	{ m => 'W', v => 'sending' },
	{ m => '.', v => 'open' },
	);
	# Couldn't parse 4-0 0 0/0/10634 S 0.00 4 0 0.0 0.00 26.44 127.0.0.1 (unavailable) GET /server-status HTTP/1.1
	foreach my $line (@status) {
	my ($slot, $pid, $mode, $ss, $rest) = ($line =~
	m!^\s*
	(\d+) # slot
	-\d+ # generation
	\s+(\d+\|-) # pid
	\s+\d+/\d+/\d+ # accesses per connection / child / slot
	\s+([_SRWKDLG.]) # mode
	\s+\d+\.\d+ # CPU usage
	\s+(\d+) # SS - seconds since beginning of most recent request (or not, see below)
	\s+\d+ # Milliseconds required to process most recent request
	\s+\d+\.\d+ # Kilobytes transferred this connection
	\s+\d+\.\d+ # Megabytes transferred this child
	\s+\d+\.\d+ # Total megabytes transferred this slot
	(.*)$ # host, vhost, request
	!x);
	# NOTE: SS value is not actually usable since it doesn't reset when
	# apache accepts a connection; so we have to keep track of it ourselves

	# try again if the pid is 0
	return (undef, 0) if $pid eq '0';
	die "Couldn't parse $line" unless $pid;
	$p{'total'}++;

	# normalize spaces
	$rest =~ s/\s+/ /g;

	# keep track of when a slot starts processing a new request by
	# tracking changes
	if ($pid ne $slots{$slot}{'pid'} or
	$mode ne $slots{$slot}{'mode'} or
	$rest ne $slots{$slot}{'rest'}
	) {
	$slots{$slot}{'time'} = $time;
	}

	my $slot_time = $slots{$slot}{'time'} \|\| $time;
	$slots{$slot} = {
	'pid' => $pid,
	'mode' => $mode,
	'rest' => $rest,
	'time' => $slot_time,
	};

	my $found = 0;
	foreach my $m (@m) {
	if ($mode eq $m->{m}) {
	$p{$m->{v}}{'count'}++;
	$p{$m->{v}}{'pids'} \|\|= [];
	push @{$p{$m->{v}}{'pids'}}, [ $slot, $pid, $rest, $slot_time ];
	$found = 1;
	last;
	}
	}
	$p{'other'}++ unless $found;
	# print "pid [$pid] mode [$mode] rest [$rest]\n";
	}

	$p{'total'} += 0; # make numeric
	# if we didn't parse any lines, the server is probably down; try again
	unless ($p{'total'}) {
	print scalar localtime(), ": couldn't parse any lines, assuming httpd is restarting; trying again\n";
	return (undef, 0);
	}

	# print "Couldn't find all the slots, is apache still initializing?\n@status"
	# unless $p{'total'} == $expected_total;

	$p{'waiting'}{'count'} \|\|= 0;
	$p{'open'}{'count'} \|\|= 0;

	# update the open count based on the max slot config; when apache has just started this may differ
	if ($p{'total'} < $expected_total) {
	$p{'open'}{'count'} += $expected_total - $p{'total'};
	}

	# print a quick status update
	print $p{'waiting'}{'count'} + $p{'open'}{'count'};

	# if there are <= threshold waiting slots, and no open slots, then kill
	# any idle reading slots
	if ($p{'waiting'}{'count'} <= $waiting_threshold and not $p{'open'}{'count'}) {
	return (\%p, 1);
	}

	return (\%p, 0);
	}

	sub kill_connections {
	my $p = shift;

	print "\n", scalar localtime(), ": looking to kill idle read connections\n@status\n";
	system(q!sudo netstat -np \| grep 'ESTABLISHED' \| egrep ':(80\|443) '!);

	# check for idle reading connections
	# idle connections look like:
	# 0-0 30044 0/20/10453 R 1.00 7 0 0.0 1.27 25.39 ? ? ..reading..

	$p->{'reading'}{'pids'} \|\|= [];
	my $i = 0 ;
	my $time = time();
	foreach my $slot (@{$p->{'reading'}{'pids'}}) {
	my $s = $slot->[0];
	my $pid = $slot->[1];
	my $rest = $slot->[2];
	my $slot_time = $slot->[3];

	# find a slot that's accepting a request
	next unless $rest =~ m/\s+\?\s+\?\s+\Q..reading..\E/;

	# make sure it's been idle
	my $idle = $time - $slot_time;
	next unless $idle > $let_idle;

	# skip if we already killed it
	next if grep $pid == $_->[0], @killed;

	# kill
	print scalar localtime(), ": killing pid: [$pid] idle for: [$idle] info: $rest\n";
	system("sudo kill $pid");
	push @killed, [ $pid, $time ];

	$i++;
	last if $i >= $kill_idle;
	}

	return;
	}

	sub rekill_stuck {
	my $time = time();
	foreach my $k (splice @killed) {
	my $pid = $k->[0];
	my $t = $k->[1];

	# use /proc to check pid
	next unless -r "/proc/$pid";

	# print "\n", scalar localtime(), ": killed process still active: $pid\n";
	push @killed, $k;

	next unless $time - $t > $rekill_after;

	print "\n", scalar localtime(), ": kill -9 pid: $pid\n";
	system("sudo kill -9 $pid");
	}

	return;
	}