Skip to content

Instantly share code, notes, and snippets.

@tjmehta
Created September 2, 2015 01:37
Show Gist options
  • Save tjmehta/b12bcec19f9872c505df to your computer and use it in GitHub Desktop.
Save tjmehta/b12bcec19f9872c505df to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# Collect information about a crash and create a report in the directory
# specified by apport.fileutils.report_dir.
# See https://wiki.ubuntu.com/Apport for details.
#
# Copyright (c) 2006 - 2011 Canonical Ltd.
# Author: Martin Pitt <martin.pitt@ubuntu.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version. See http://www.gnu.org/copyleft/gpl.html for
# the full text of the license.
import sys, os, os.path, subprocess, time, traceback, pwd, io
import signal, inspect, grp, fcntl
import apport, apport.fileutils
#################################################################
#
# functions
#
#################################################################
def check_lock():
'''Abort if another instance of apport is already running.
This avoids bringing down the system to its knees if there is a series of
crashes.'''
# create a lock file
lockfile = os.path.join(apport.fileutils.report_dir, '.lock')
try:
fd = os.open(lockfile, os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW)
except OSError as e:
error_log('cannot create lock file (uid %i): %s' % (os.getuid(), str(e)))
sys.exit(1)
try:
fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
except IOError:
error_log('another apport instance is already running, aborting')
sys.exit(1)
def drop_privileges(pid, partial=False):
'''Change user and group to match the given target process.'''
stat = None
try:
stat = os.stat('/proc/' + pid)
except OSError as e:
raise ValueError('Invalid process ID: ' + str(e))
if partial:
effective_gid = os.getegid()
effective_uid = os.geteuid()
else:
effective_gid = stat.st_gid
effective_uid = stat.st_uid
os.setregid(stat.st_gid, effective_gid)
os.setreuid(stat.st_uid, effective_uid)
assert os.getegid() == effective_gid
assert os.getgid() == stat.st_gid
assert os.geteuid() == effective_uid
assert os.getuid() == stat.st_uid
def init_error_log():
'''Open a suitable error log if sys.stderr is not a tty.'''
if not os.isatty(2):
log = os.environ.get('APPORT_LOG_FILE', '/var/log/apport.log')
try:
f = os.open(log, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600)
try:
admgid = grp.getgrnam('adm')[2]
os.chown(log, -1, admgid)
os.chmod(log, 0o640)
except KeyError:
pass # if group adm doesn't exist, just leave it as root
except OSError: # on a permission error, don't touch stderr
return
os.dup2(f, 1)
os.dup2(f, 2)
os.close(f)
sys.stderr = os.fdopen(2, 'wb')
if sys.version_info.major >= 3:
sys.stderr = io.TextIOWrapper(sys.stderr)
sys.stdout = sys.stderr
def error_log(msg):
'''Output something to the error log.'''
apport.error('apport (pid %s) %s: %s', os.getpid(), time.asctime(), msg)
def _log_signal_handler(sgn, frame):
'''Internal apport signal handler. Just log the signal handler and exit.'''
# reset handler so that we do not get stuck in loops
signal.signal(sgn, signal.SIG_IGN)
try:
error_log('Got signal %i, aborting; frame:' % sgn)
for s in inspect.stack():
error_log(str(s))
except:
pass
sys.exit(1)
def setup_signals():
'''Install a signal handler for all crash-like signals, so that apport is
not called on itself when apport crashed.'''
signal.signal(signal.SIGILL, _log_signal_handler)
signal.signal(signal.SIGABRT, _log_signal_handler)
signal.signal(signal.SIGFPE, _log_signal_handler)
signal.signal(signal.SIGSEGV, _log_signal_handler)
signal.signal(signal.SIGPIPE, _log_signal_handler)
signal.signal(signal.SIGBUS, _log_signal_handler)
def write_user_coredump(pid, cwd, limit, from_report=None):
'''Write the core into the current directory if ulimit requests it.'''
# three cases:
# limit == 0: do not write anything
# limit < 0: unlimited, write out everything
# limit nonzero: crashed process' core size ulimit in bytes
if limit == 0:
return
core_path = os.path.join(cwd, 'core')
try:
with open('/proc/sys/kernel/core_uses_pid') as f:
if f.read().strip() != '0':
core_path += '.' + str(pid)
core_file = os.open(core_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o640)
except (OSError, IOError):
return
error_log('writing core dump to %s (limit: %s)' % (core_path, str(limit)))
written = 0
# Priming read
if from_report:
r = apport.Report()
with open(from_report, 'rb') as f:
r.load(f)
core_size = len(r['CoreDump'])
if limit > 0 and core_size > limit:
error_log('aborting core dump writing, size %i exceeds current limit' % core_size)
os.close(core_file)
os.unlink(core_path)
return
error_log('writing core dump %s of size %i' % (core_path, core_size))
os.write(core_file, r['CoreDump'])
else:
# read from stdin
block = os.read(0, 1048576)
while True:
size = len(block)
if size == 0:
break
written += size
if limit > 0 and written > limit:
error_log('aborting core dump writing, size exceeds current limit %i' % limit)
os.close(core_file)
os.unlink(core_path)
return
if os.write(core_file, block) != size:
error_log('aborting core dump writing, could not write')
os.close(core_file)
os.unlink(core_path)
return
block = os.read(0, 1048576)
os.close(core_file)
return core_path
def usable_ram():
'''Return how many bytes of RAM is currently available that can be
allocated without causing major thrashing.'''
# abuse our excellent RFC822 parser to parse /proc/meminfo
r = apport.Report()
with open('/proc/meminfo', 'rb') as f:
r.load(f)
memfree = int(r['MemFree'].split()[0])
cached = int(r['Cached'].split()[0])
writeback = int(r['Writeback'].split()[0])
return (memfree + cached - writeback) * 1024
def is_closing_session(pid, uid):
'''Check if pid is in a closing user session.
During that, crashes are common as the session D-BUS and X.org are going
away, etc. These crash reports are mostly noise, so should be ignored.
'''
with open('/proc/%s/environ' % pid) as e:
env = e.read().split('\0')
for e in env:
if e.startswith('DBUS_SESSION_BUS_ADDRESS='):
dbus_addr = e.split('=', 1)[1]
break
else:
error_log('is_closing_session(): no DBUS_SESSION_BUS_ADDRESS in environment')
return False
orig_uid = os.geteuid()
os.setresuid(uid, uid, -1)
try:
gdbus = subprocess.Popen(['/usr/bin/gdbus', 'call', '-e', '-d',
'org.gnome.SessionManager', '-o', '/org/gnome/SessionManager', '-m',
'org.gnome.SessionManager.IsSessionRunning'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE, env={'DBUS_SESSION_BUS_ADDRESS': dbus_addr})
(out, err) = gdbus.communicate()
if err:
error_log('gdbus call error: ' + err.decode('UTF-8'))
except OSError as e:
error_log('gdbus call failed, cannot determine running session: ' + str(e))
return False
finally:
os.setresuid(orig_uid, orig_uid, -1)
error_log('debug: session gdbus call: ' + out.decode('UTF-8'))
if out.startswith(b'(false,'):
return True
return False
#################################################################
#
# main
#
#################################################################
if len(sys.argv) != 4:
try:
print('Usage: %s <pid> <signal number> <core file ulimit>' % sys.argv[0])
print('The core dump is read from stdin.')
except IOError:
# sys.stderr might not actually exist, expecially not when being called
# from the kernel
pass
sys.exit(1)
init_error_log()
check_lock()
try:
setup_signals()
(pid, signum, core_ulimit) = sys.argv[1:]
# drop our process priority level to not disturb userspace so much
try:
os.nice(10)
except OSError:
pass # *shrug*, we tried
# Partially drop privs to gain proper os.access() checks
drop_privileges(pid, True)
# try to find the core dump file; if path is relative, prepend cwd of
# crashed process
cwd = os.readlink('/proc/' + pid + '/cwd')
error_log('called for pid %s, signal %s, core limit %s' % (pid, signum, core_ulimit))
try:
core_ulimit = int(core_ulimit)
except ValueError:
error_log('core limit is invalid, disabling core files')
core_ulimit = 0
# clamp core_ulimit to a sensible size, for -1 the kernel reports something
# absurdly big
if core_ulimit > 9223372036854775807:
error_log('ignoring implausibly big core limit, treating as unlimited')
core_ulimit = -1
# ulimit specifies blocks, which are kB
if core_ulimit > 0:
core_ulimit *= 1024
# ignore SIGQUIT (it's usually deliberately generated by users)
if signum == str(signal.SIGQUIT):
drop_privileges(pid)
write_user_coredump(pid, cwd, core_ulimit)
sys.exit(0)
try:
pidstat = os.stat('/proc/' + pid)
except OSError:
error_log('Invalid PID')
sys.exit(1)
# check if the executable was modified after the process started (e. g.
# package got upgraded in between)
exe_mtime = os.stat('/proc/%s/exe' % pid).st_mtime
process_start = os.lstat('/proc/%s/cmdline' % pid).st_mtime
if not os.path.exists(os.readlink('/proc/%s/exe' % pid)) or exe_mtime > process_start:
error_log('executable was modified after program start, ignoring')
sys.exit(1)
info = apport.Report('Crash')
info['Signal'] = signum
if sys.version_info.major < 3:
info['CoreDump'] = (sys.stdin, True, usable_ram() * 3 / 4, True)
else:
# read binary data from stdio
info['CoreDump'] = (sys.stdin.detach(), True, usable_ram() * 3 / 4, True)
# We already need this here to figure out the ExecutableName (for scripts,
# etc).
info.add_proc_info(pid)
if 'ExecutablePath' not in info:
error_log('could not determine ExecutablePath, aborting')
sys.exit(1)
subject = info['ExecutablePath'].replace('/', '_')
base = '%s.%s.%s.hanging' % (subject, str(pidstat.st_uid), pid)
hanging = os.path.join(apport.fileutils.report_dir, base)
if os.path.exists(hanging):
if (os.stat('/proc/uptime').st_ctime < os.stat(hanging).st_mtime):
info['ProblemType'] = 'Hang'
os.unlink(hanging)
if 'InterpreterPath' in info:
error_log('script: %s, interpreted by %s (command line "%s")' %
(info['ExecutablePath'], info['InterpreterPath'],
info['ProcCmdline']))
else:
error_log('executable: %s (command line "%s")' %
(info['ExecutablePath'], info['ProcCmdline']))
# ignore non-package binaries (unless configured otherwise)
if not apport.fileutils.likely_packaged(info['ExecutablePath']):
if not apport.fileutils.get_config('main', 'unpackaged', False, bool=True):
error_log('executable does not belong to a package, ignoring')
# check if the user wants a core dump
drop_privileges(pid)
write_user_coredump(pid, cwd, core_ulimit)
sys.exit(1)
# ignore SIGXCPU and SIGXFSZ since this indicates some external
# influence changing soft RLIMIT values when running programs.
if signum in [str(signal.SIGXCPU), str(signal.SIGXFSZ)]:
error_log('Ignoring signal %s (caused by exceeding soft RLIMIT)' % signum)
drop_privileges(pid)
write_user_coredump(pid, cwd, core_ulimit)
sys.exit(0)
# ignore blacklisted binaries
if info.check_ignored():
error_log('executable version is blacklisted, ignoring')
sys.exit(1)
if is_closing_session(pid, pidstat.st_uid):
error_log('happens for shutting down session, ignoring')
sys.exit(1)
crash_counter = 0
# Create crash report file descriptor for writing the report into
# report_dir
try:
report = '%s/%s.%i.crash' % (apport.fileutils.report_dir, info['ExecutablePath'].replace('/', '_'), pidstat.st_uid)
if os.path.exists(report):
if apport.fileutils.seen_report(report):
# do not flood the logs and the user with repeated crashes
with open(report, 'rb') as f:
crash_counter = apport.fileutils.get_recent_crashes(f)
crash_counter += 1
if crash_counter > 1:
drop_privileges(pid)
write_user_coredump(pid, cwd, core_ulimit)
error_log('this executable already crashed %i times, ignoring' % crash_counter)
sys.exit(1)
# remove the old file, so that we can create the new one with
# os.O_CREAT|os.O_EXCL
os.unlink(report)
else:
error_log('apport: report %s already exists and unseen, doing nothing to avoid disk usage DoS' % report)
drop_privileges(pid)
write_user_coredump(pid, cwd, core_ulimit)
sys.exit(1)
reportfile = os.fdopen(os.open(report, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0), 'wb')
assert reportfile.fileno() > sys.stderr.fileno()
# Make sure the crash reporting daemon can read this report
try:
gid = pwd.getpwnam('whoopsie').pw_gid
os.chown(report, pidstat.st_uid, gid)
except (OSError, KeyError):
os.chown(report, pidstat.st_uid, pidstat.st_gid)
except (OSError, IOError) as e:
error_log('Could not create report file: %s' % str(e))
sys.exit(1)
# Totally drop privs before writing out the reportfile.
drop_privileges(pid)
info.add_user_info()
info.add_os_info()
if crash_counter > 0:
info['CrashCounter'] = '%i' % crash_counter
try:
info.write(reportfile)
if reportfile != sys.stderr:
# Ensure that the file gets written to disk in the event of an
# Upstart crash.
if info.get('ExecutablePath', '') == '/sbin/init':
reportfile.flush()
os.fsync(reportfile.fileno())
parent_directory = os.path.dirname(report)
try:
fd = os.open(parent_directory, os.O_RDONLY)
os.fsync(fd)
finally:
os.close(fd)
reportfile.close()
except IOError:
if reportfile != sys.stderr:
os.unlink(report)
raise
if report:
os.chmod(report, 0o640)
if reportfile != sys.stderr:
error_log('wrote report %s' % report)
# Check if the user wants a core file. We need to create that from the
# written report, as we can only read stdin once and write_user_coredump()
# might abort reading from stdin and remove the written core file when
# core_ulimit is > 0 and smaller than the core size.
write_user_coredump(pid, cwd, core_ulimit, from_report=report)
except (SystemExit, KeyboardInterrupt):
raise
except Exception as e:
error_log('Unhandled exception:')
traceback.print_exc()
error_log('pid: %i, uid: %i, gid: %i, euid: %i, egid: %i' % (
os.getpid(), os.getuid(), os.getgid(), os.geteuid(), os.getegid()))
error_log('environment: %s' % str(os.environ))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment