Created
September 2, 2015 01:37
Revisions
-
tjmehta created this gist
Sep 2, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,460 @@ #!/usr/bin/python3 # Collect information about a crash and create a report in the directory # specified by apport.fileutils.report_dir. # See https://wiki.ubuntu.com/Apport for details. # # Copyright (c) 2006 - 2011 Canonical Ltd. # Author: Martin Pitt <martin.pitt@ubuntu.com> # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. See http://www.gnu.org/copyleft/gpl.html for # the full text of the license. import sys, os, os.path, subprocess, time, traceback, pwd, io import signal, inspect, grp, fcntl import apport, apport.fileutils ################################################################# # # functions # ################################################################# def check_lock(): '''Abort if another instance of apport is already running. This avoids bringing down the system to its knees if there is a series of crashes.''' # create a lock file lockfile = os.path.join(apport.fileutils.report_dir, '.lock') try: fd = os.open(lockfile, os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW) except OSError as e: error_log('cannot create lock file (uid %i): %s' % (os.getuid(), str(e))) sys.exit(1) try: fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: error_log('another apport instance is already running, aborting') sys.exit(1) def drop_privileges(pid, partial=False): '''Change user and group to match the given target process.''' stat = None try: stat = os.stat('/proc/' + pid) except OSError as e: raise ValueError('Invalid process ID: ' + str(e)) if partial: effective_gid = os.getegid() effective_uid = os.geteuid() else: effective_gid = stat.st_gid effective_uid = stat.st_uid os.setregid(stat.st_gid, effective_gid) os.setreuid(stat.st_uid, effective_uid) assert os.getegid() == effective_gid assert os.getgid() == stat.st_gid assert os.geteuid() == effective_uid assert os.getuid() == stat.st_uid def init_error_log(): '''Open a suitable error log if sys.stderr is not a tty.''' if not os.isatty(2): log = os.environ.get('APPORT_LOG_FILE', '/var/log/apport.log') try: f = os.open(log, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600) try: admgid = grp.getgrnam('adm')[2] os.chown(log, -1, admgid) os.chmod(log, 0o640) except KeyError: pass # if group adm doesn't exist, just leave it as root except OSError: # on a permission error, don't touch stderr return os.dup2(f, 1) os.dup2(f, 2) os.close(f) sys.stderr = os.fdopen(2, 'wb') if sys.version_info.major >= 3: sys.stderr = io.TextIOWrapper(sys.stderr) sys.stdout = sys.stderr def error_log(msg): '''Output something to the error log.''' apport.error('apport (pid %s) %s: %s', os.getpid(), time.asctime(), msg) def _log_signal_handler(sgn, frame): '''Internal apport signal handler. Just log the signal handler and exit.''' # reset handler so that we do not get stuck in loops signal.signal(sgn, signal.SIG_IGN) try: error_log('Got signal %i, aborting; frame:' % sgn) for s in inspect.stack(): error_log(str(s)) except: pass sys.exit(1) def setup_signals(): '''Install a signal handler for all crash-like signals, so that apport is not called on itself when apport crashed.''' signal.signal(signal.SIGILL, _log_signal_handler) signal.signal(signal.SIGABRT, _log_signal_handler) signal.signal(signal.SIGFPE, _log_signal_handler) signal.signal(signal.SIGSEGV, _log_signal_handler) signal.signal(signal.SIGPIPE, _log_signal_handler) signal.signal(signal.SIGBUS, _log_signal_handler) def write_user_coredump(pid, cwd, limit, from_report=None): '''Write the core into the current directory if ulimit requests it.''' # three cases: # limit == 0: do not write anything # limit < 0: unlimited, write out everything # limit nonzero: crashed process' core size ulimit in bytes if limit == 0: return core_path = os.path.join(cwd, 'core') try: with open('/proc/sys/kernel/core_uses_pid') as f: if f.read().strip() != '0': core_path += '.' + str(pid) core_file = os.open(core_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o640) except (OSError, IOError): return error_log('writing core dump to %s (limit: %s)' % (core_path, str(limit))) written = 0 # Priming read if from_report: r = apport.Report() with open(from_report, 'rb') as f: r.load(f) core_size = len(r['CoreDump']) if limit > 0 and core_size > limit: error_log('aborting core dump writing, size %i exceeds current limit' % core_size) os.close(core_file) os.unlink(core_path) return error_log('writing core dump %s of size %i' % (core_path, core_size)) os.write(core_file, r['CoreDump']) else: # read from stdin block = os.read(0, 1048576) while True: size = len(block) if size == 0: break written += size if limit > 0 and written > limit: error_log('aborting core dump writing, size exceeds current limit %i' % limit) os.close(core_file) os.unlink(core_path) return if os.write(core_file, block) != size: error_log('aborting core dump writing, could not write') os.close(core_file) os.unlink(core_path) return block = os.read(0, 1048576) os.close(core_file) return core_path def usable_ram(): '''Return how many bytes of RAM is currently available that can be allocated without causing major thrashing.''' # abuse our excellent RFC822 parser to parse /proc/meminfo r = apport.Report() with open('/proc/meminfo', 'rb') as f: r.load(f) memfree = int(r['MemFree'].split()[0]) cached = int(r['Cached'].split()[0]) writeback = int(r['Writeback'].split()[0]) return (memfree + cached - writeback) * 1024 def is_closing_session(pid, uid): '''Check if pid is in a closing user session. During that, crashes are common as the session D-BUS and X.org are going away, etc. These crash reports are mostly noise, so should be ignored. ''' with open('/proc/%s/environ' % pid) as e: env = e.read().split('\0') for e in env: if e.startswith('DBUS_SESSION_BUS_ADDRESS='): dbus_addr = e.split('=', 1)[1] break else: error_log('is_closing_session(): no DBUS_SESSION_BUS_ADDRESS in environment') return False orig_uid = os.geteuid() os.setresuid(uid, uid, -1) try: gdbus = subprocess.Popen(['/usr/bin/gdbus', 'call', '-e', '-d', 'org.gnome.SessionManager', '-o', '/org/gnome/SessionManager', '-m', 'org.gnome.SessionManager.IsSessionRunning'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'DBUS_SESSION_BUS_ADDRESS': dbus_addr}) (out, err) = gdbus.communicate() if err: error_log('gdbus call error: ' + err.decode('UTF-8')) except OSError as e: error_log('gdbus call failed, cannot determine running session: ' + str(e)) return False finally: os.setresuid(orig_uid, orig_uid, -1) error_log('debug: session gdbus call: ' + out.decode('UTF-8')) if out.startswith(b'(false,'): return True return False ################################################################# # # main # ################################################################# if len(sys.argv) != 4: try: print('Usage: %s <pid> <signal number> <core file ulimit>' % sys.argv[0]) print('The core dump is read from stdin.') except IOError: # sys.stderr might not actually exist, expecially not when being called # from the kernel pass sys.exit(1) init_error_log() check_lock() try: setup_signals() (pid, signum, core_ulimit) = sys.argv[1:] # drop our process priority level to not disturb userspace so much try: os.nice(10) except OSError: pass # *shrug*, we tried # Partially drop privs to gain proper os.access() checks drop_privileges(pid, True) # try to find the core dump file; if path is relative, prepend cwd of # crashed process cwd = os.readlink('/proc/' + pid + '/cwd') error_log('called for pid %s, signal %s, core limit %s' % (pid, signum, core_ulimit)) try: core_ulimit = int(core_ulimit) except ValueError: error_log('core limit is invalid, disabling core files') core_ulimit = 0 # clamp core_ulimit to a sensible size, for -1 the kernel reports something # absurdly big if core_ulimit > 9223372036854775807: error_log('ignoring implausibly big core limit, treating as unlimited') core_ulimit = -1 # ulimit specifies blocks, which are kB if core_ulimit > 0: core_ulimit *= 1024 # ignore SIGQUIT (it's usually deliberately generated by users) if signum == str(signal.SIGQUIT): drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) sys.exit(0) try: pidstat = os.stat('/proc/' + pid) except OSError: error_log('Invalid PID') sys.exit(1) # check if the executable was modified after the process started (e. g. # package got upgraded in between) exe_mtime = os.stat('/proc/%s/exe' % pid).st_mtime process_start = os.lstat('/proc/%s/cmdline' % pid).st_mtime if not os.path.exists(os.readlink('/proc/%s/exe' % pid)) or exe_mtime > process_start: error_log('executable was modified after program start, ignoring') sys.exit(1) info = apport.Report('Crash') info['Signal'] = signum if sys.version_info.major < 3: info['CoreDump'] = (sys.stdin, True, usable_ram() * 3 / 4, True) else: # read binary data from stdio info['CoreDump'] = (sys.stdin.detach(), True, usable_ram() * 3 / 4, True) # We already need this here to figure out the ExecutableName (for scripts, # etc). info.add_proc_info(pid) if 'ExecutablePath' not in info: error_log('could not determine ExecutablePath, aborting') sys.exit(1) subject = info['ExecutablePath'].replace('/', '_') base = '%s.%s.%s.hanging' % (subject, str(pidstat.st_uid), pid) hanging = os.path.join(apport.fileutils.report_dir, base) if os.path.exists(hanging): if (os.stat('/proc/uptime').st_ctime < os.stat(hanging).st_mtime): info['ProblemType'] = 'Hang' os.unlink(hanging) if 'InterpreterPath' in info: error_log('script: %s, interpreted by %s (command line "%s")' % (info['ExecutablePath'], info['InterpreterPath'], info['ProcCmdline'])) else: error_log('executable: %s (command line "%s")' % (info['ExecutablePath'], info['ProcCmdline'])) # ignore non-package binaries (unless configured otherwise) if not apport.fileutils.likely_packaged(info['ExecutablePath']): if not apport.fileutils.get_config('main', 'unpackaged', False, bool=True): error_log('executable does not belong to a package, ignoring') # check if the user wants a core dump drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) sys.exit(1) # ignore SIGXCPU and SIGXFSZ since this indicates some external # influence changing soft RLIMIT values when running programs. if signum in [str(signal.SIGXCPU), str(signal.SIGXFSZ)]: error_log('Ignoring signal %s (caused by exceeding soft RLIMIT)' % signum) drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) sys.exit(0) # ignore blacklisted binaries if info.check_ignored(): error_log('executable version is blacklisted, ignoring') sys.exit(1) if is_closing_session(pid, pidstat.st_uid): error_log('happens for shutting down session, ignoring') sys.exit(1) crash_counter = 0 # Create crash report file descriptor for writing the report into # report_dir try: report = '%s/%s.%i.crash' % (apport.fileutils.report_dir, info['ExecutablePath'].replace('/', '_'), pidstat.st_uid) if os.path.exists(report): if apport.fileutils.seen_report(report): # do not flood the logs and the user with repeated crashes with open(report, 'rb') as f: crash_counter = apport.fileutils.get_recent_crashes(f) crash_counter += 1 if crash_counter > 1: drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) error_log('this executable already crashed %i times, ignoring' % crash_counter) sys.exit(1) # remove the old file, so that we can create the new one with # os.O_CREAT|os.O_EXCL os.unlink(report) else: error_log('apport: report %s already exists and unseen, doing nothing to avoid disk usage DoS' % report) drop_privileges(pid) write_user_coredump(pid, cwd, core_ulimit) sys.exit(1) reportfile = os.fdopen(os.open(report, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0), 'wb') assert reportfile.fileno() > sys.stderr.fileno() # Make sure the crash reporting daemon can read this report try: gid = pwd.getpwnam('whoopsie').pw_gid os.chown(report, pidstat.st_uid, gid) except (OSError, KeyError): os.chown(report, pidstat.st_uid, pidstat.st_gid) except (OSError, IOError) as e: error_log('Could not create report file: %s' % str(e)) sys.exit(1) # Totally drop privs before writing out the reportfile. drop_privileges(pid) info.add_user_info() info.add_os_info() if crash_counter > 0: info['CrashCounter'] = '%i' % crash_counter try: info.write(reportfile) if reportfile != sys.stderr: # Ensure that the file gets written to disk in the event of an # Upstart crash. if info.get('ExecutablePath', '') == '/sbin/init': reportfile.flush() os.fsync(reportfile.fileno()) parent_directory = os.path.dirname(report) try: fd = os.open(parent_directory, os.O_RDONLY) os.fsync(fd) finally: os.close(fd) reportfile.close() except IOError: if reportfile != sys.stderr: os.unlink(report) raise if report: os.chmod(report, 0o640) if reportfile != sys.stderr: error_log('wrote report %s' % report) # Check if the user wants a core file. We need to create that from the # written report, as we can only read stdin once and write_user_coredump() # might abort reading from stdin and remove the written core file when # core_ulimit is > 0 and smaller than the core size. write_user_coredump(pid, cwd, core_ulimit, from_report=report) except (SystemExit, KeyboardInterrupt): raise except Exception as e: error_log('Unhandled exception:') traceback.print_exc() error_log('pid: %i, uid: %i, gid: %i, euid: %i, egid: %i' % ( os.getpid(), os.getuid(), os.getgid(), os.geteuid(), os.getegid())) error_log('environment: %s' % str(os.environ))