Last active
January 27, 2018 08:23
-
-
Save eserte/7ed71d7e02e76e43eeb2249e0fd6becf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# -*- perl -*- | |
# | |
# Author: Slaven Rezic | |
# | |
# Copyright (C) 2015-2018 Slaven Rezic. All rights reserved. | |
# This program is free software; you can redistribute it and/or | |
# modify it under the same terms as Perl itself. | |
# | |
# Mail: slaven@rezic.de | |
# WWW: http://www.rezic.de/eserte/ | |
# | |
use strict; | |
use Fcntl qw(O_CREAT O_EXCL); | |
use File::Basename qw(basename); | |
use Getopt::Long; | |
use POSIX qw(strftime); | |
my $get_process_table; | |
BEGIN { | |
if ($^O eq 'MSWin32') { | |
require P9Y::ProcessTable; | |
$get_process_table = sub { P9Y::ProcessTable->table }; | |
no warnings 'once'; | |
*P9Y::ProcessTable::Process::cmndline = sub { shift->cmdline(@_) }; | |
} else { | |
require Proc::ProcessTable; | |
$get_process_table = sub { @{ Proc::ProcessTable->new->table } }; | |
} | |
} | |
use Sys::Hostname qw(hostname); | |
sub search_prereqcheck_pid (); | |
sub search_perl_test_candidates (); | |
sub guess_distvname ($); | |
sub mywarn ($); | |
sub v_warn ($); | |
sub debug ($); | |
my @known_problematic_distvnames = | |
( | |
($^O eq 'freebsd' ? | |
( | |
'AnyEvent-PgRecvlogical-0.001', # seen on fbsd12 smoker | |
'App-Prove-Plugin-Cluster-0.02', | |
'BrowserStack-Local-0.1.0', | |
'Coro-PatchSet-0.13', | |
'CPAN-Testers-TailLog-0.001000', | |
'Daemon-Device-1.05', | |
'Daemon-Device-1.06', | |
'Daemon-Device-1.07', | |
'Debug-Client-0.29', | |
'Debug-Client-0.30', | |
'Debug-Client-0.31', | |
'File-FDpasser-0.09', # t/t07fork.t may hang | |
'Filesys-POSIX-0.9.19', # test after t/tar-missing.t may hang on 9.2 | |
'Gtk2-Ex-WidgetBits-48', | |
'IPC-Queue-Duplex-1.007', | |
'IPC-SRLock-0.28.1', | |
'Mail-Milter-Authentication-v1.1.5', | |
'Message-Passing-Output-Log-Any-Adapter-0.003', # https://rt.cpan.org/Ticket/Display.html?id=120419 (locking issue) | |
'Mojo-Phantom-0.07', | |
'Mojo-Phantom-0.09', | |
'Mojo-Phantom-0.10', | |
'Mojolicious-Command-bulkget-0.02', | |
'PasswordMonkey-0.09', | |
'Patro-0.12', | |
'Patro-0.13', | |
'Patro-0.14', | |
'Patro-0.15', | |
'Plack-Middleware-AccessLog-Structured-ZeroMQ-0.001000', | |
'POE-Component-Client-HTTP-0.949', | |
'UV-1.000000_01', # hangs after t/05-poll-close.t | |
'UV-1.000004', # hangs in t/03-timer-from-check.t | |
'XS-Logger-0.001', # tests may hang | |
'XS-Logger-0.002', # " | |
'XS-Logger-0.003', # " | |
'WAIT-1.800', | |
'Wight-0.02', # don't know about linux, requires phantomjs | |
) : ()), | |
($^O eq 'linux' ? | |
( | |
'AnyEvent-Digest-v0.0.5', | |
'AnyEvent-Gearman-0.10', | |
'AnyEvent-IRC-Server-0.03', # https://github.com/kan/p5-anyevent-irc-server/issues/5 | |
'App-KGB-1.33', | |
'App-MechaCPAN-0.20', | |
'Catalyst-Plugin-Upload-Image-Magick-0.04', | |
'DBD-Firebird-1.22', | |
'DBD-Firebird-1.24', | |
'Enbugger-2.016', | |
'Gearman-Client-Async-0.94', | |
'Gearman-JobScheduler-0.16', | |
'Hiredis-Raw-0.07', | |
'HTTP-Proxy-0.304', # may hang on centos6 systems | |
'HTTP-ProxySelector-Persistent-0.02', | |
'HTTP-Server-EV-0.69', | |
'IO-Async-XMLStream-SAXReader-0.001000', | |
'IO-Async-XMLStream-SAXReader-0.001001', | |
'IO-Async-XMLStream-SAXReader-0.001002', # https://github.com/kentnl/IO-Async-XMLStream-SAXReader/issues/2 | |
'IO-EventMux-2.02', | |
'IO-File-Log-1.01', | |
'Log-Dispatch-ZMQ-0.02', | |
'Log-Syslog-DangaSocket-1.06', | |
'LWPx-TimedHTTP-1.8', | |
'Mail-GnuPG-0.23', # hangs only on jessie | |
'Module-License-Report-0.02', | |
'Mojo-Redis2-0.26', | |
'Mojo-Redis2-0.27', | |
'Mojolicious-Plugin-AssetPack-1.28', | |
'Net-Dropbear-0.07', | |
'Net-FTPServer-1.125', | |
'Net-Link-0.01', | |
'Net-NfDump-1.24', | |
'Net-NfDump-1.25', | |
'Net-Server-SS-PreFork-0.05', | |
'Net-ZooKeeper-Lock-0.03', | |
'Paranoid-2.04', | |
'Perlbal-1.80', | |
'Plack-Middleware-AccessLog-Structured-ZeroMQ-0.001000', | |
'Plack-Middleware-AccessLog-Structured-ZeroMQ-0.001001', | |
'POE-Component-Client-FTP-0.24', | |
'POE-Component-Net-FTP-0.001', | |
'Prophet-0.751', | |
'Rstat-Client-2.2', # hanging test.pl XXX killing does not seem to work... | |
'Starlink-AST-1.05', | |
'Sys-SyslogMessages-0.02', | |
'Tapper-Reports-API-5.0.5', | |
'Test-RedisServer-0.20', | |
'Thread-Isolate-0.05', | |
'Thrust-0.200', | |
'Valence-0.200', | |
'ZeroMQ-PubSub-0.10', | |
) : ()), | |
($^O eq 'MSWin32' ? | |
( | |
'Logfile-Rotate-1.04', | |
'Net-Server-2.009', | |
'POE-1.367', | |
'Unix-Passwd-File-0.24', # tests never finish, consuming CPU | |
) : ()), | |
'AtteanX-Query-Cache-0.002', | |
'AnyEvent-FDpasser-0.3.0', # various tests may hang, seen on freebsd and linux | |
'AnyEvent-Gearman-WorkerPool-0.4', | |
'AnyEvent-Gearman-WorkerPool-1.0', | |
'AnyEvent-Net-MPD-0.001', # busy loop in test (would be killed anyway after 3600 CPU seconds) | |
'AnyEvent-SIP-0.002', | |
'App-perlrdf-0.006', | |
'App-prowess-0.05', | |
'Async-Simple-Pool-0.02', | |
'Async-Simple-Pool-0.03', | |
'Async-Simple-Pool-0.04', | |
'Async-Simple-Pool-0.05', | |
'BSON-Decode-0.02', | |
'BSON-Decode-0.03', | |
'Catalyst-View-Reproxy-0.05', # https://rt.cpan.org/Ticket/Display.html?id=82831 | |
'Chart-GRACE-0.95', | |
'code-UnifdefPlus-v0.4.0', # see also distropref file | |
'Concurrent-Object-1.07', | |
'Config-Model-TkUI-1.352', | |
'Config-Model-TkUI-1.353', | |
'Config-Model-TkUI-1.354', | |
'Config-Model-TkUI-1.360', | |
'Config-Model-TkUI-1.361', | |
'Daemon-DaemonizeLight-0.01', | |
'Daemon-DaemonizeLight-0.02', | |
'Daemon-DaemonizeLight-0.03', | |
'DBD-Firebird-1.29', # t/embed-80-event-ithreads.t may hang | |
'DBD-Firebird-1.31', | |
'DBIx-DataModel-2.47_06', | |
'DBIx-DataModel-2.47_07', | |
'DBIx-DataModel-2.47_08', | |
'Debug-DBGp-0.20', | |
'Devel-Debug-DBGp-0.20', | |
'Devel-hdb-0.23_09', | |
'Encode-ISO2022-0.04', # https://rt.cpan.org/Ticket/Display.html?id=117157 | |
'FCGI-Engine-0.22', # may hang | |
'Feersum-1.405', | |
'File-BSDGlob-0.94', # see distroprefs file | |
'File-Lock-Multi-1.02', # https://rt.cpan.org/Ticket/Display.html?id=104720 | |
'File-ReadBackwards-1.05', # may hang with -jX, see https://rt.cpan.org/Ticket/Display.html?id=92313 | |
'Flower-0.10', | |
'Forks-Queue-0.09', | |
'Forks-Super-0.88', | |
'Forks-Super-0.89', | |
'Gearman-Driver-0.02008', | |
'Gearman-Glutch-0.02', | |
'Gearman-SlotManager-0.3', | |
'IPC-AnyEvent-Gearman-0.8', | |
'IPC-Shareable-0.61', | |
'IPC-Transit-1.133280', # ticket exists | |
'IPC-Transit-1.161400', | |
'IPC-Transit-1.162230', | |
'IPC-Transit-1.171860', | |
'IO-Socket-Socks-0.71', # https://rt.cpan.org/Ticket/Display.html?id=118522 | |
'Mail-GPG-1.0.11', # may hang on freebsd & linux | |
'Mail-Milter-Authentication-Handler-SMIME-v1.1.5', | |
'Mail-Milter-Authentication-Handler-SMIME-v1.1.6', | |
'Mojo-IOLoop-ReadWriteFork-0.23', | |
'Mojo-IOLoop-ReadWriteProcess-0.09', | |
'Mojo-SMTP-Client-0.14', | |
'Mojolicious-Plugin-Webtail-0.06', | |
'MojoX-UserAgent-Throttler-v1.0.0', | |
'MojoX-UserAgent-Throttler-v1.0.1', | |
'MojoX-UserAgent-Throttler-v1.0.2', | |
'Monoceros-0.27', | |
'MooseX-Workers-0.24', | |
'Net-Netcat-0.05', | |
'Net-Ping-2.53', # https://rt.cpan.org/Public/Bug/Display.html?id=118451 | |
'Net-Ping-2.54', # https://rt.cpan.org/Public/Bug/Display.html?id=118451 | |
'Net-Proxy-0.13', # https://rt.cpan.org/Ticket/Display.html?id=100007 | |
'Net-SeedServe-v0.2.5', | |
'Net-SIP-0.700', | |
'Net-SIP-0.701', | |
'Net-Statsd-Server-0.20', | |
'NRD-Daemon-0.04', | |
'Patro-0.16', | |
'Perlwikipedia-1.5.2', | |
'Plack-Handler-Stomp-1.13', | |
'Plack-Handler-Stomp-1.14', | |
'PMLTQ-1.1.0', | |
'POE-Component-CPANPLUS-YACSmoke-1.64', | |
'POE-Component-Server-HTTP-KeepAlive-0.0307', # test t/25_poco_simplehttp.t may hang | |
'POE-Component-Server-SimpleHTTP-PreFork-2.10', # may hang | |
'POE-Component-SmokeBox-0.48', | |
'POE-Component-SmokeBox-0.50', | |
'POE-Component-SmokeBox-0.52', | |
'POE-Component-SSLify-1.012', | |
'POE-Component-WWW-XKCD-AsText-0.002', | |
'POE-Loop-IO_Async-0.004', | |
'Proc-Safetynet-0.04', | |
'Redis-1.981', | |
'Parallel-Prefork-0.18', | |
'PDF-Imposition-0.21', | |
'Server-Starter-0.32', | |
'Server-Starter-0.33', | |
'Starlet-0.29', | |
'Starlet-0.30', | |
'Starlet-0.31', | |
'Term-Query-2.0', | |
'Test-WWW-Simple-0.34', | |
'Test2-Harness-0.000001', | |
'Test2-Harness-0.000003', | |
'Test2-Harness-0.001021', | |
'Test2-Harness-0.001022', | |
'Test2-Harness-0.001026', | |
'Test2-Harness-0.001027', | |
'Test2-Harness-0.001028', | |
'Test2-Harness-0.001029', | |
'Vim-Debug-0.904', # https://github.com/kablamo/VimDebug/issues/36 (hangs with perl 5.25.x) | |
'WebService-ChangesXml-0.01', | |
'WebService-Freshservice-0.003', | |
'WWW-Stickam-API-0.02', | |
'X11-SendEvent-1.3', | |
# old, unconfirmed | |
'Alvis-Pipeline-0.11', # used to be in 01.DISABLED.yml - 1-read.t may hang | |
'Alvis-Convert-0.4', # used to be in 01.DISABLED.yml - 1-read.t may hang | |
'AnyEvent-Net-Curl-Queued-0.047', | |
'AnyEvent-ZeroMQ-0.01', | |
'Cvs-0.07', | |
'DJabberd-0.85', | |
'Data-Persist-0.12', | |
'Devel-Debug-Server-1.001', | |
'Directory-Transactional-0.09', | |
'IO-Socket-SSL-1.997', | |
'IO-Socket-Socks-Wrapper-0.11_1', | |
'Limper-0.002', | |
'Lock-Server-1.4', # but not really killable... | |
'Mojo-IRC-0.07', | |
'Net-EPP-Registry-Nominet-0.01_02', | |
'Net-RabbitMQ-Java-2.030102', | |
'PDL-Graphics-Gnuplot-2.000', | |
'POE-Component-IKC-0.2400', | |
'POE-Component-MessageQueue-0.3001', | |
'POE-Quickie-0.18', | |
'POEx-ZMQ-0.000_001', | |
'PPerl-0.25', | |
'RDF-LinkedData-0.66', | |
'Redis-1.982', # at least on some linux systems | |
'RT-Client-REST-0.46', | |
'Sub-Call-Tail-0.05', | |
'Tcl-1.06', | |
'Thrall-0.0303', | |
'Thread-Workers-0.07', | |
'Tie-Hash-DBD-0.11', | |
'WWW-Curl-4.17', | |
'ZMQ-1.06', | |
'ZMQ-FFI-0.13', | |
'ZMQ-FFI-1.11', | |
'ZMQ-LibZMQ3-1.15', | |
'ZMQ-LibZMQ4-0.01', | |
## maybe missing XXX | |
# Gtk2 | |
# Gtk3 | |
# Gapp | |
# Inline-Ruby | |
# POE-Loop-AnyEvent | |
# Net-Random | |
# Vim-Debug (may hang on FreeBSD) | |
# Net-Netcat | |
# Data::OFAC | |
# Glib @ FreeBSD | |
# MogileFS (under FreeBSD) | |
); | |
my @known_problematic_distnames = | |
( | |
($^O eq 'freebsd' ? | |
( | |
'Devel-Examine-Subs', # hangs sometimes, seen with 1.61, 1.62, 1.63 | |
'Log-Any-Adapter-Multiplexor', # 0.01, 0.02, 0.03 | |
'Schedule-LongSteps', # hangs (at least on 9.2 smoker), seen with 0.008, 0.009, 0.010 | |
'Wrap-Sub', # hangs seen with 0.05, 0.06 | |
'Zabbix-ServerScript', # hangs seen with 0.03, 0.04, 0.06 | |
) : | |
$^O eq 'linux' ? | |
( | |
'App-Netdisco', # hangs on: 2.035000, 2.035001, 2.035002, 2.035003 | |
) : | |
() | |
), | |
'Message-Passing-ZeroMQ', # hangs seen with 0.009 and 0.010 (freebsd) and 0.010 (jessie) | |
'Mojo-Redis2', # hangs seen with 0.28 .. 0.31 | |
'Firefox-Marionette', # may hang seen with '0.01'..'0.13' | |
'Gearman', # hangs seen with: 'Gearman-1.12.003', 'Gearman-1.12.004', 'Gearman-1.12.005', 'Gearman-1.12.006', 'Gearman-1.12.008' | |
'HPCI', # hangs with 'HPCI-0.39', 'HPCI-0.40', 'HPCI-0.41', 'HPCI-0.42', 0.46 | |
'POE-Component-CPAN-YACSmoke', # hangs with 1.36 and 1.38 | |
'WWW-WebKit', # may hang with 0.08..0.11 | |
); | |
my @known_problematic_verbose_loop_distvnames = | |
( | |
'WWW-TheEchoNest-1.1', | |
); | |
my %known_problematic_distvnames = map {($_,1)} @known_problematic_distvnames; | |
my %known_problematic_distnames = map {($_,1)} @known_problematic_distnames; | |
my %known_problematic_verbose_loop_distvnames = map {($_,1)} @known_problematic_verbose_loop_distvnames; | |
my $doit; | |
my $v; | |
my $debug; | |
my $sleep = 10; | |
my $max_runtime = 200; # for idle tty | |
my $max_total_runtime = 1800; # process runtime | |
my $max_prereq_check_runtime = 600; | |
my $do_strace; | |
$SIG{TERM} = sub { | |
warn "INFO: Exiting because of SIGTERM.\n"; | |
CORE::exit(0); | |
}; | |
GetOptions( | |
'doit' => \$doit, | |
'v+' => \$v, | |
'debug' => \$debug, | |
'maxruntime=f' => \$max_runtime, | |
'sleep=f' => \$sleep, | |
'strace!' => \$do_strace, | |
) | |
or die "usage: $0 [-doit] [-v] [-maxruntime seconds] [-sleep seconds] [-strace]\n"; | |
my %old_killed_pids; | |
my %reported_unkillable_pid; | |
while() { | |
my %pids_to_kill; | |
###################################################################### | |
# check for hanging PrereqCheck | |
{ | |
my $pid = search_prereqcheck_pid; | |
if (defined $pid) { | |
v_warn "Found pid $pid through 'hanging PrereqCheck'"; | |
$pids_to_kill{$pid} = 1; | |
} | |
} | |
###################################################################### | |
# check for hanging tests | |
{ | |
my @candidates = search_perl_test_candidates; | |
if (@candidates) { | |
my %searchpids = map { ($_->{pid},1) } @candidates; | |
my %children; # pid -> [childpid,...] | |
my %pid_to_rec; | |
for my $p ($get_process_table->()) { | |
my $pid = $p->pid; | |
my $ppid = $p->ppid; | |
if (defined $ppid) { | |
push @{ $children{$ppid} }, $p; | |
} | |
$pid_to_rec{$pid} = $p; | |
} | |
my $check_and_kill_process; | |
$check_and_kill_process = sub { | |
my($p, $kill_descendents) = @_; | |
my $pid = $p->pid; | |
my $child_runtime = time - $p->start; | |
if ($kill_descendents || $child_runtime > $max_runtime) { | |
for my $childp (@{ $children{ $pid } || [] }) { | |
$check_and_kill_process->($childp, 1); | |
} | |
my $p = $pid_to_rec{$pid}; | |
if (!$p) { | |
mywarn "Unexpected: no process entry for pid $pid found\n"; | |
} else { | |
if (($p->state||'') =~ m{^(defunct|zombie)$}) { | |
my $ppid = $p->ppid; | |
v_warn "Process $pid is a zombie. Can we kill the parent (pid=$ppid)?"; | |
if ($ppid <= 1) { | |
v_warn "Parent is the init process, don't do anything"; | |
# fall through | |
} else { | |
my $pp = $pid_to_rec{$ppid}; | |
if (!$pp) { | |
mywarn "Unexpected: no process entry for pid $ppid found\n"; | |
} else { | |
if ($pp->cmndline =~ m{perl}) { | |
v_warn "Will kill parent process " . $pp->cmndline; | |
$pids_to_kill{$ppid} = 1; | |
return; | |
} else { | |
v_warn "Parent does not look like a perl process '" . $pp->cmndline . "', won't kill it"; | |
# fall through | |
} | |
} | |
} | |
} | |
} | |
v_warn "Found pid $pid through 'hanging perl test' (candidates are: " . join(", ", map { $_->{distvname} } @candidates) . ")"; | |
$pids_to_kill{$pid} = 1; | |
} | |
}; | |
for my $searchpid (keys %searchpids) { | |
for my $childp (@{ $children{$searchpid} || [] }) { | |
$check_and_kill_process->($childp); | |
} | |
} | |
} | |
} | |
###################################################################### | |
# Kill processes (if there are any) | |
if (%pids_to_kill) { | |
for my $pid (keys %pids_to_kill) { | |
print STDERR "kill $pid..."; | |
if ($doit) { | |
kill 9 => $pid; | |
} else { | |
print STDERR " (dry run mode, not killing)"; | |
} | |
print STDERR "\n"; | |
} | |
} | |
###################################################################### | |
# Handle unkillable processes | |
if ($doit) { | |
my @unkillable_pids; | |
while(my($pid) = each %old_killed_pids) { | |
if ($pids_to_kill{$pid} && !$reported_unkillable_pid{$pid}) { | |
push @unkillable_pids, $pid; | |
$reported_unkillable_pid{$pid} = 1; | |
} | |
} | |
my $signal_file = "/tmp/wait-and-kill-cpan-smoker-unkillable-pids-" . (eval { ((getpwuid($<))[0]) } || 'unknown'); | |
if (@unkillable_pids) { | |
sysopen my $fh, $signal_file, O_CREAT|O_EXCL | |
or warn "Cannot touch $signal_file; maybe other user created this file? $!"; | |
my $msg = "Warning: unkillable pid(s): @unkillable_pids"; | |
warn "$msg\n"; | |
if ($^O ne 'MSWin32') { | |
if (fork == 0) { | |
my @args = ('-bg', 'red', '-fg', 'white', "wait-and-kill\@".hostname.": $msg"); | |
if (is_in_path('tkmessage')) { | |
system('tkmessage', @args); | |
} else { | |
system('xmessage', @args); | |
} | |
exit; | |
} | |
} | |
} else { | |
if (-e $signal_file) { | |
unlink $signal_file; | |
} | |
} | |
%old_killed_pids = %pids_to_kill; | |
} | |
###################################################################### | |
sleep $sleep; | |
} | |
sub search_prereqcheck_pid () { | |
for my $p ($get_process_table->()) { | |
my $cmndline = $p->cmndline; | |
my $duration = time-$p->start; | |
if ( | |
$cmndline =~ m{perl[\d\.]*[dt]? \S+/CPAN/Reporter/PrereqCheck.pm < \S*tmp\S*CPAN-Reporter-PI} && | |
$duration >= $max_prereq_check_runtime | |
) { | |
my $pid = $p->pid; | |
warn "Found hanging prereq check <$cmndline>, duration=${duration}s, pid=$pid\n"; | |
if ($do_strace) { | |
my $strace_log = "/tmp/strace_wakcp_$<.log"; | |
if (-s $strace_log) { | |
warn "WARN: non-empty $strace_log already exists, won't overwrite...\n"; | |
} else { | |
system("strace -p $pid -o $strace_log -tt -T -s512 &"); | |
warn "strace'ing hanging process, log is in $strace_log...\n"; | |
} | |
} | |
return $pid; | |
} | |
} | |
undef; | |
} | |
sub search_perl_test_candidates () { | |
my @candidates; | |
for my $p ($get_process_table->()) { | |
my $cmndline = $p->cmndline; | |
next if !defined $cmndline; | |
if ( | |
$cmndline =~ m{perl[\d\.]*[dt]? .*-M(Test::Harness|ExtUtils::Command::MM) .*test_harness} || | |
$cmndline =~ m{perl[\d\.]*[dt]? \./Build test} || | |
$cmndline =~ m{perl[\d\.]*[dt]? "-Iblib/lib" "-Iblib/arch" test.pl} || | |
$cmndline =~ m{perl[\d\.]*[dt]? -Iblib/lib -Iblib/arch test.pl} || | |
($^O eq 'MSWin32' && ( | |
# perl.exe + extra quotes around arguments | |
$cmndline =~ m{perl.exe"? .*-M(Test::Harness|ExtUtils::Command::MM)"? .*test_harness} | |
# XXX other forms are missing | |
) | |
) | |
) { | |
my $pid = $p->pid; | |
my $duration = time-$p->start; | |
debug "Found perl test script pid=$pid duration=$duration"; | |
if ($duration > $max_runtime) { | |
my $distvname = guess_distvname $pid; | |
my $push_candidate = sub { | |
push @candidates, { cmndline => $cmndline, distvname => $distvname, duration => $duration, pid => $pid }; | |
}; | |
if ($known_problematic_verbose_loop_distvnames{$distvname}) { | |
debug " distvname=$distvname verbose endless loop candidate"; | |
$push_candidate->(); | |
} else { | |
my $age; | |
my $timeout_type; | |
my $check; | |
my $ttydev = $p->ttydev; | |
if (defined $ttydev) { | |
my(@ttydev_stat) = stat($ttydev); | |
if (!@ttydev_stat) { | |
warn "Strange: cannot stat $ttydev..."; | |
# assume running forever | |
$ttydev_stat[9] = 0; | |
} | |
my $ttydev_age = time - $ttydev_stat[9]; | |
debug " distvname=$distvname tty=$ttydev age=$ttydev_age"; | |
$age = $ttydev_age; | |
$timeout_type = "no output"; | |
$check = $max_runtime; | |
} else { | |
$age = time - $p->start; | |
$timeout_type = 'still running'; | |
$check = $max_total_runtime; | |
} | |
if ($age > $check) { | |
(my $distname = $distvname) =~ s{(.*)-.*}{$1}; | |
if (exists $known_problematic_distvnames{$distvname} || | |
exists $known_problematic_distnames{$distname}) { | |
$push_candidate->(); | |
} else { | |
mywarn "$pid (distvname=$distvname) is long-running (${duration}s, with $timeout_type in the last ${age}s), but not in known problematic dists..."; | |
} | |
} | |
} | |
} | |
} | |
} | |
if (@candidates) { | |
@candidates = sort { $b->{duration} <=> $a->{duration} } @candidates; | |
} | |
@candidates; | |
} | |
sub guess_distvname ($) { | |
my $pid = shift; | |
my $cwd; | |
if ($^O eq 'freebsd') { | |
my @cmd = ('procstat', '-f', $pid); | |
if (open my $fh, '-|', @cmd) { | |
while(<$fh>) { | |
chomp; | |
s/^\s+//; | |
my @f = split /\s+/, $_; | |
if ($f[2] eq 'cwd') { | |
$cwd = $f[9]; | |
last; | |
} | |
} | |
close $fh; | |
} else { | |
my $err = $!; | |
if (!kill 0 => $pid) { | |
# pid vanished, ignore | |
} else { | |
die "Running command '@cmd' failed: $err"; | |
} | |
} | |
} elsif ($^O eq 'linux') { | |
$cwd = readlink "/proc/$pid/cwd"; | |
$cwd =~ s{\Q (deleted)\E$}{}; | |
$cwd =~ s{/$}{}; # seen on cvrsnica-jessie | |
} elsif ($^O eq 'darwin') { | |
open my $fh, '-|', 'lsof', '-p', $pid or die $!; | |
while(<$fh>) { | |
chomp; | |
my(@f) = split /\s+/, $_; | |
if ($f[4] eq 'DIR') { | |
$cwd = $f[8]; | |
last; | |
} | |
} | |
warn "Cannot detect CWD for process $pid using lsof...\n"; | |
} elsif ($^O eq 'MSWin32') { | |
## cwd returns undef, cannot use it... | |
# my $process = P9Y::ProcessTable->process($pid); | |
# if ($process) { | |
# $cwd = $process->cwd; # XXX returns undef XXX | |
# } else { | |
# warn "Cannot find process for pid=$pid...\n"; | |
# } | |
my $output = `handle -p $pid`; | |
my @candidates; | |
for my $l (split /\n/, $output) { | |
$l =~ s{\r}{}; | |
if ($l =~ /^\s*[0-9A-F]+:\s+File\s+\S+\s+(.*)/) { | |
my $candidate = $1; | |
if ($candidate =~ m{\\cpan\\build\\\d+\\[^\\]+$}) { | |
push @candidates, $candidate; | |
} | |
} | |
} | |
if (@candidates == 1) { | |
$cwd = $candidates[0]; | |
} elsif (@candidates == 0) { | |
warn "Did not found cwd candidate using command 'handle'...\n"; | |
} else { | |
warn "Too many cwd candidates found: @candidates\n"; | |
} | |
} | |
if (defined $cwd) { | |
my $distvname = basename $cwd; | |
if ($distvname =~ m{\d-(\d|1\d)$}) { | |
$distvname =~ s{-(\d|1\d)$}{}; # new-style CPAN temporary directory | |
} else { | |
$distvname =~ s{-......$}{}; # old-style | |
} | |
return $distvname; | |
} | |
} | |
sub mywarn ($) { | |
warn strftime('[%F %T] ', localtime), $_[0], "\n"; | |
} | |
sub v_warn ($) { | |
if ($v) { | |
mywarn($_[0]); | |
} | |
} | |
sub debug ($) { | |
if ($debug) { | |
warn strftime('[%F %T] ', localtime) . "DEBUG: $_[0]\n"; | |
} | |
} | |
# REPO BEGIN | |
# REPO NAME is_in_path /home/e/eserte/src/srezic-repository | |
# REPO MD5 e18e6687a056e4a3cbcea4496aaaa1db | |
sub is_in_path { | |
my($prog) = @_; | |
if (file_name_is_absolute($prog)) { | |
if ($^O eq 'MSWin32') { | |
return $prog if (-f $prog && -x $prog); | |
return "$prog.bat" if (-f "$prog.bat" && -x "$prog.bat"); | |
return "$prog.com" if (-f "$prog.com" && -x "$prog.com"); | |
return "$prog.exe" if (-f "$prog.exe" && -x "$prog.exe"); | |
return "$prog.cmd" if (-f "$prog.cmd" && -x "$prog.cmd"); | |
} else { | |
return $prog if -f $prog and -x $prog; | |
} | |
} | |
require Config; | |
%Config::Config = %Config::Config if 0; # cease -w | |
my $sep = $Config::Config{'path_sep'} || ':'; | |
foreach (split(/$sep/o, $ENV{PATH})) { | |
if ($^O eq 'MSWin32') { | |
# maybe use $ENV{PATHEXT} like maybe_command in ExtUtils/MM_Win32.pm? | |
return "$_\\$prog" if (-f "$_\\$prog" && -x "$_\\$prog"); | |
return "$_\\$prog.bat" if (-f "$_\\$prog.bat" && -x "$_\\$prog.bat"); | |
return "$_\\$prog.com" if (-f "$_\\$prog.com" && -x "$_\\$prog.com"); | |
return "$_\\$prog.exe" if (-f "$_\\$prog.exe" && -x "$_\\$prog.exe"); | |
return "$_\\$prog.cmd" if (-f "$_\\$prog.cmd" && -x "$_\\$prog.cmd"); | |
} else { | |
return "$_/$prog" if (-x "$_/$prog" && !-d "$_/$prog"); | |
} | |
} | |
undef; | |
} | |
# REPO END | |
# REPO BEGIN | |
# REPO NAME file_name_is_absolute /home/e/eserte/src/srezic-repository | |
# REPO MD5 89d0fdf16d11771f0f6e82c7d0ebf3a8 | |
BEGIN { | |
if (eval { require File::Spec; defined &File::Spec::file_name_is_absolute }) { | |
*file_name_is_absolute = \&File::Spec::file_name_is_absolute; | |
} else { | |
*file_name_is_absolute = sub { | |
my $file = shift; | |
my $r; | |
if ($^O eq 'MSWin32') { | |
$r = ($file =~ m;^([a-z]:(/|\\)|\\\\|//);i); | |
} else { | |
$r = ($file =~ m|^/|); | |
} | |
$r; | |
}; | |
} | |
} | |
# REPO END | |
__END__ | |
=head1 NAME | |
wait-and-kill-cpan-smoker --- kill hanging perl tests | |
=head1 USAGE | |
Just to show processes which are likely to hang: | |
wait-and-kill-cpan-smoker -v -debug | |
Actually kill processes which are mentioned in the list and not | |
creating any terminal output for more than 200s: | |
wait-and-kill-cpan-smoker -v -debug -doit | |
Verbosity may be increased by adding more C<-v> options. | |
=head1 PRIVATE USAGE | |
Distributed to smoker machines like this (would kill and restart a | |
running wait-and-kill-cpan-smoker process if started like outlined | |
below): | |
(cd ~/bin/sh && gmake rsync-wait-and-kill-cpan-smoker-and-restart) | |
Started on the smokers like this: | |
forever /tmp/wait-and-kill-cpan-smoker -doit -v -debug | |
=cut | |
# Local variables: | |
# compile-command: "perl -c wait-and-kill-cpan-smoker && ssh -A cvrsnica /home/e/eserte/devel/smoker-wait-and-kill-restart.pl" | |
# End: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment