Last active
July 13, 2017 15:47
-
-
Save ddurst/4cdad11ac9c30d340bfe4a5f0d6585aa to your computer and use it in GitHub Desktop.
ping-based top crashers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
--- | |
title: Top Crashers list from client-side stacks | |
authors: | |
- Adam Gashlin | |
- David Durst | |
tags: | |
- crash | |
- client-side | |
- signature | |
created_at: 2017-05-10 | |
updated_at: 2017-07-06 | |
tldr: This queries crash pings from a specified day, symbolicates then to assign signatures, and then makes a top crasher list for that day. | |
--- | |
# # Top crash ping signatures, by day | |
# In[1]: | |
# This Source Code Form is subject to the terms of the Mozilla Public | |
# License, v. 2.0. If a copy of the MPL was not distributed with this | |
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
from moztelemetry import get_pings_properties | |
from moztelemetry import Dataset | |
import json | |
import requests | |
import re | |
from itertools import islice | |
# ## Ping processing | |
# In[2]: | |
SYMBOLICATION_API_URL = 'http://symbolapi.mozilla.org/' | |
# SYMBOLICATION_API_URL = 'https://symbols.dev.mozaws.net/' | |
COLLAPSE_ARGUMENTS = True | |
ESCAPE_SINGLE_QUOTE = True | |
MAXIMUM_FRAMES_TO_CONSIDER = 40 | |
SIGNATURE_MAX_LEN = 255 | |
TWO_WEEKS = 14 | |
SIX_WEEKS = 42 | |
# ## Symbolication | |
# In[3]: | |
# extract function name from "fn (in module)" | |
EXTRACT_FUNCTION_NAME = re.compile(r'\A(.+) (\(in .+\))\Z') | |
HEX_ADDR = re.compile(r'\A0x[0-9a-fA-F]+\Z') | |
def symbolicate_ping(ping): | |
"""Take a crash ping and return symbolicated stack traces. | |
Uses the symbolication API to look up function names. | |
""" | |
# if there is no payload or stackTraces, return nothing | |
stack_traces = None | |
if not ping.get('payload', None): | |
# return None | |
raise ValueError('No payload') | |
else: | |
payload = ping['payload'] | |
if not payload.get('stackTraces', None): | |
# return None | |
raise ValueError('No stackTraces') | |
else: | |
stack_traces = payload['stackTraces'] | |
# make sure we have threads, modules, and crashing_thread | |
missing = '' | |
if 'threads' not in stack_traces: | |
missing = 'threads' | |
elif 'modules' not in stack_traces: | |
missing = 'modules' | |
elif not stack_traces.get('crash_info', None): | |
missing = 'crash_info' | |
else: | |
threads = stack_traces['threads'] | |
modules = stack_traces['modules'] | |
if 'crashing_thread' not in stack_traces['crash_info']: | |
missing = 'crashing_thread' | |
else: | |
crashing_thread = stack_traces['crash_info']['crashing_thread'] | |
if missing: | |
msg = "missing " + missing | |
if stack_traces: | |
msg += "; " + stack_traces.get('status', 'STATUS MISSING') | |
raise ValueError(msg) | |
if not (crashing_thread >= 0 and crashing_thread < len(threads)): | |
msg = "crashing_thread " + crashing_thread | |
msg += " out of range" | |
raise ValueError(msg) | |
symbolicated_threads = [] | |
modules_to_symbolicate = [] | |
threads_to_symbolicate = [] | |
for thread_idx, src_thread in enumerate(threads): | |
out_thread_frames = [] | |
symbolicated_threads.append(out_thread_frames) | |
frames_to_symbolicate = [] | |
# only the crashing thread and thread 0 are used for the | |
# signature, skip symbol lookup for others | |
if thread_idx != 0 and thread_idx != crashing_thread: | |
continue | |
if 'frames' not in src_thread: | |
continue | |
for frame_idx, src_frame in enumerate(islice( | |
src_thread['frames'], MAXIMUM_FRAMES_TO_CONSIDER)): | |
out_frame = {} | |
out_thread_frames.append(out_frame) | |
if 'ip' not in src_frame: | |
msg = "missing ip for thread " + thread_idx + " frame " | |
msg += frame_idx | |
raise ValueError(msg) | |
ip_int = int(src_frame['ip'], 16) | |
out_frame['offset'] = src_frame['ip'] | |
if 'module_index' not in src_frame: | |
continue | |
module_index = src_frame['module_index'] | |
if not (module_index >= 0 and module_index < len(modules)): | |
msg = "module_index " + module_index + " out of range for " | |
msg += "thread " + thread_idx + " frame " + frame_idx | |
raise ValueError(msg) | |
module = modules[module_index] | |
if 'base_addr' not in module: | |
msg = "missing base_addr for module " + module_index | |
raise ValueError(msg) | |
try: | |
module_offset_int = ip_int - int(module['base_addr'], 16) | |
except ValueError: | |
msg = "bad base_addr " + module['base_addr'] | |
msg += " for module " + module_index | |
raise ValueError(msg) | |
if 'filename' in module: | |
out_frame['module'] = module['filename'] | |
out_frame['module_offset'] = '0x%x' % module_offset_int | |
# prepare this frame for symbol lookup | |
if 'debug_file' in module and 'debug_id' in module: | |
mp = (module['debug_file'], module['debug_id']) | |
if mp not in modules_to_symbolicate: | |
modules_to_symbolicate.append(mp) | |
frames_to_symbolicate.append( | |
{'lookup': [modules_to_symbolicate.index(mp), | |
module_offset_int], | |
'output': out_frame}) | |
if len(frames_to_symbolicate) > 0: | |
threads_to_symbolicate.append(frames_to_symbolicate) | |
if len(threads_to_symbolicate) == 0: | |
return symbolicated_threads | |
sym_request = { | |
'stacks': [[f['lookup'] for f in t] for t in threads_to_symbolicate], | |
'memoryMap': | |
[[debug_file, debug_id] for | |
(debug_file, debug_id) in modules_to_symbolicate], | |
'version': 4} | |
response = requests.post(SYMBOLICATION_API_URL, | |
json=sym_request) | |
response.raise_for_status() | |
sym_result = response.json() | |
stacks = sym_result['symbolicatedStacks'] | |
for thread, thread_result in zip(threads_to_symbolicate, stacks): | |
for f, symbol in zip(thread, thread_result): | |
module_idx = f['lookup'][0] | |
if sym_result['knownModules'][module_idx]: | |
function_name = EXTRACT_FUNCTION_NAME.match(symbol).group(1) | |
if function_name and not HEX_ADDR.match(function_name): | |
f['output']['function'] = function_name | |
return symbolicated_threads | |
# ## Signature generation | |
# from https://github.com/mozilla/socorro/blob/master/socorro/processor/signature_utilities.py | |
# In[4]: | |
def generate_signature(ping, symbolicated_threads): | |
"""Using a crash ping and associated symbolicated stacks from each thread, | |
generate a signature identifying the crash. This is intended to behave | |
similarly to Socorro, given the data available in the pings. | |
""" | |
# SignatureGenerationRule | |
try: | |
lowercase = 'Windows_NT' in ping['environment']['system']['os']['name'] | |
except KeyError: | |
lowercase = False | |
stack_traces = ping['payload']['stackTraces'] | |
crashing_thread_idx = stack_traces['crash_info']['crashing_thread'] | |
crashing_thread = symbolicated_threads[crashing_thread_idx] | |
signature_list = create_frame_list(crashing_thread, lowercase) | |
signature = signature_from_list(signature_list) | |
# StackwalkerErrorSignatureRule | |
if signature.startswith('EMPTY'): | |
try: | |
signature = "%s; %s" % (signature, stack_traces['status']) | |
except KeyError: | |
pass | |
# OOMSignature | |
signature = oom_signature(ping, signature) | |
# AbortSignature | |
signature = abort_signature(ping, signature) | |
# TODO: something about MozCrashReason? | |
# TODO: something more useful for BaseThreadInitThunk | |
# SignatureShutdownTimeout | |
signature = shutdown_timeout_signature(ping, signature) | |
# SignatureRunWatchDog | |
# TODO: detect more types of hang watchdog timeout | |
if 'RunWatchdog' in signature: | |
# Always use thread 0 in this case, because that's the thread that | |
# was hanging when the software was artificially crashed. | |
crashing_thread = symbolicated_threads[0] | |
signature_list = create_frame_list(crashing_thread, lowercase) | |
signature = "shutdownhang | %s" % signature_from_list(signature_list) | |
# SigTrim | |
signature = signature.strip() | |
# SigTrunc | |
if len(signature) > SIGNATURE_MAX_LEN: | |
signature = "%s..." % signature[:SIGNATURE_MAX_LEN - 3] | |
return signature | |
# from SignatureGenerationRule._create_frame_list | |
def create_frame_list(thread, make_modules_lower_case): | |
frame_signatures_list = [] | |
for a_frame in thread: | |
if make_modules_lower_case and 'module' in a_frame: | |
a_frame['module'] = a_frame['module'].lower() | |
normalized_signature = normalize_signature(**a_frame) | |
if 'normalized' not in a_frame: | |
a_frame['normalized'] = normalized_signature | |
frame_signatures_list.append(normalized_signature) | |
return frame_signatures_list | |
# from CSignatureToolBase._do_generate | |
def signature_from_list(source_list): | |
""" | |
each element of signatureList names a frame in the crash stack; and is: | |
- a prefix of a relevant frame: Append this element to the signature | |
- a relevant frame: Append this element and stop looking | |
- irrelevant: Append this element only after seeing a prefix frame | |
The signature is a ' | ' separated string of frame names. | |
""" | |
# shorten source_list to the first signatureSentinel | |
sentinel_locations = [] | |
for a_sentinel in signature_sentinels: | |
if type(a_sentinel) == tuple: | |
a_sentinel, condition_fn = a_sentinel | |
if not condition_fn(source_list): | |
continue | |
try: | |
sentinel_locations.append(source_list.index(a_sentinel)) | |
except ValueError: | |
pass | |
if sentinel_locations: | |
source_list = source_list[min(sentinel_locations):] | |
# Get all the relevant frame signatures. | |
new_signature_list = [] | |
for a_signature in source_list: | |
# If the signature matches the irrelevant signatures regex, | |
# skip to the next frame. | |
if irrelevant_signature_re.match(a_signature): | |
continue | |
# If the signature matches the trim dll signatures regex, | |
# rewrite it to remove all but the module name. | |
if trim_dll_signature_re.match(a_signature): | |
a_signature = a_signature.split('@')[0] | |
# If this trimmed DLL signature is the same as the previous | |
# frame's, we do not want to add it. | |
if ( | |
new_signature_list and | |
a_signature == new_signature_list[-1] | |
): | |
continue | |
new_signature_list.append(a_signature) | |
# If the signature does not match the prefix signatures regex, | |
# then it is the last one we add to the list. | |
if not prefix_signature_re.match(a_signature): | |
break | |
# Add a special marker for hang crash reports. | |
#if hang_type: | |
# new_signature_list.insert(0, self.hang_prefixes[hang_type]) | |
signature = ' | '.join(new_signature_list) | |
# Handle empty signatures to explain why we failed generating them. | |
if signature == '' or signature is None: | |
try: | |
signature = source_list[0] | |
except IndexError: | |
signature = "EMPTY: no frame data available" | |
if ESCAPE_SINGLE_QUOTE: | |
signature = signature.replace("'", "''") | |
if len(signature) > SIGNATURE_MAX_LEN: | |
signature = "%s..." % signature[:SIGNATURE_MAX_LEN - 3] | |
return signature | |
FIXUP_SPACE = re.compile(r' (?=[\*&,])') | |
FIXUP_COMMA = re.compile(r',(?! )') | |
# from CSignatureToolBase.normalize_signature and friends | |
def normalize_signature( | |
module=None, | |
function=None, | |
file=None, | |
line=None, | |
module_offset=None, | |
offset=None, | |
function_offset=None, | |
normalized=None, | |
**kwargs # eat any extra kwargs passed in | |
): | |
""" returns a structured conglomeration of the input parameters to | |
serve as a signature. the parameter names of this function reflect the | |
exact names of the fields from the jsonmdsw frame output. this allows | |
this function to be invoked by passing a frame as **a_frame. sometimes, | |
a frame may already have a normalized version cached. if that exists, | |
return it instead. | |
""" | |
if normalized is not None: | |
return normalized | |
def collapse( | |
function_signature_str, | |
open_string, | |
replacement_open_string, | |
close_string, | |
replacement_close_string, | |
exception_substring_list=(), # list of exceptions that shouldn't collapse | |
): | |
"""takes a string representing a C/C++ function signature | |
and replaces anything between to possibly nested delimiters""" | |
target_counter = 0 | |
collapsed_list = [] | |
exception_mode = False | |
def append_if_not_in_collapse_mode(a_character): | |
if not target_counter: | |
collapsed_list.append(a_character) | |
def is_exception( | |
exception_list, | |
remaining_original_line, | |
line_up_to_current_position | |
): | |
for an_exception in exception_list: | |
if remaining_original_line.startswith(an_exception): | |
return True | |
if line_up_to_current_position.endswith(an_exception): | |
return True | |
return False | |
for index, a_character in enumerate(function_signature_str): | |
if a_character == open_string: | |
if is_exception( | |
exception_substring_list, | |
function_signature_str[index + 1:], | |
function_signature_str[:index] | |
): | |
exception_mode = True | |
append_if_not_in_collapse_mode(a_character) | |
continue | |
append_if_not_in_collapse_mode(replacement_open_string) | |
target_counter += 1 | |
elif a_character == close_string: | |
if exception_mode: | |
append_if_not_in_collapse_mode(a_character) | |
exception_mode = False | |
else: | |
target_counter -= 1 | |
append_if_not_in_collapse_mode(replacement_close_string) | |
else: | |
append_if_not_in_collapse_mode(a_character) | |
edited_function = ''.join(collapsed_list) | |
return edited_function | |
if function: | |
function = collapse( | |
function, | |
'<', | |
'<', | |
'>', | |
'T>', | |
('name omitted', 'IPC::ParamTraits') | |
) | |
if COLLAPSE_ARGUMENTS: | |
function = collapse( | |
function, | |
'(', | |
'', | |
')', | |
'', | |
('anonymous namespace', 'operator') | |
) | |
# Remove spaces before all stars, ampersands, and commas | |
function = FIXUP_SPACE.sub('', function) | |
# Ensure a space after commas | |
function = FIXUP_COMMA.sub(', ', function) | |
return function | |
if not module and not module_offset and offset: | |
return "@%s" % offset | |
if not module: | |
module = '' # might have been None | |
return '%s@%s' % (module, module_offset) | |
def oom_signature(ping, signature): | |
match = False | |
if 'metadata' in ping['payload']: | |
if 'OOMAllocationSize' in ping['payload']['metadata']: | |
match = True | |
for a_signature_fragment in ( 'NS_ABORT_OOM', | |
'mozalloc_handle_oom', | |
'CrashAtUnhandlableOOM', | |
'AutoEnterOOMUnsafeRegion'): | |
if a_signature_fragment in signature: | |
match = True | |
if not match: | |
return signature | |
try: | |
oom_size = int(ping['payload']['metadata']['OOMAllocationSize']) | |
if oom_size <= 262144: # 256K | |
signature = "OOM | small" | |
else: | |
signature = ( | |
"OOM | large | " + signature | |
) | |
except (TypeError, AttributeError, KeyError): | |
signature = ( | |
"OOM | unknown | " + signature | |
) | |
return signature | |
def abort_signature(ping, signature): | |
if 'metadata' not in ping['payload']: | |
return signature | |
if 'AbortMessage' not in ping['payload']['metadata']: | |
return signature | |
if not ping['payload']['metadata']['AbortMessage']: | |
return signature | |
abort_message = ping['payload']['metadata']['AbortMessage'] | |
if '###!!! ABORT: file ' in abort_message: | |
# This is an abort message that contains no interesting | |
# information. We just want to put the "Abort" marker in the | |
# signature. | |
return 'Abort | {}'.format(signature) | |
if '###!!! ABORT:' in abort_message: | |
# Recent crash reports added some irrelevant information at the | |
# beginning of the abort message. We want to remove that and keep | |
# just the actual abort message. | |
abort_message = abort_message.split('###!!! ABORT:', 1)[1].strip() | |
if ': file ' in abort_message: | |
# Abort messages contain a file name and a line number. Since | |
# those are very likely to change between builds, we want to | |
# remove those parts from the signature. | |
abort_message = abort_message.split(': file ', 1)[0].strip() | |
if len(abort_message) > 80: | |
abort_message = abort_message[:77] + '...' | |
return 'Abort | {} | {}'.format(abort_message, signature) | |
def shutdown_timeout_signature(ping, signature): | |
if 'metadata' not in ping['payload']: | |
return signature | |
timeout_json = ping['payload']['metadata'].get('AsyncShutdownTimeout','') | |
if not timeout_json: | |
return signature | |
parts = ['AsyncShutdownTimeout'] | |
try: | |
shutdown_data = json.loads(timeout_json) | |
parts.append(shutdown_data['phase']) | |
conditions = [c['name'] for c in shutdown_data['conditions']] | |
if conditions: | |
conditions.sort() | |
parts.append(','.join(conditions)) | |
else: | |
parts.append("(none)") | |
except (ValueError, KeyError), e: | |
parts.append("UNKNOWN") | |
new_sig = ' | '.join(parts) | |
return new_sig | |
# ## Lists of function signatures used in signature generation | |
# from https://github.com/mozilla/socorro/tree/master/socorro/siglists | |
# In[5]: | |
irrelevant_signature_re = re.compile( | |
'|'.join("""@0x[0-9a-fA-F]{2,} | |
@0x[1-9a-fA-F] | |
__aeabi_fcmpgt.* | |
ashmem | |
app_process@0x.* | |
core\.odex@0x.* | |
core::panicking::.* | |
CrashStatsLogForwarder::CrashAction | |
_CxxThrowException | |
dalvik-heap | |
dalvik-jit-code-cache | |
dalvik-LinearAlloc | |
dalvik-mark-stack | |
data@app@org\.mozilla\.f.*-\d\.apk@classes\.dex@0x.* | |
framework\.odex@0x.* | |
google_breakpad::ExceptionHandler::HandleInvalidParameter.* | |
KiFastSystemCallRet | |
libandroid_runtime\.so@0x.* | |
libbinder\.so@0x.* | |
libc\.so@.* | |
libc-2\.5\.so@.* | |
libEGL\.so@.* | |
libdvm\.so\s*@\s*0x.* | |
libgui\.so@0x.* | |
libicudata.so@.* | |
libMali\.so@0x.* | |
libutils\.so@0x.* | |
libz\.so@0x.* | |
linux-gate\.so@0x.* | |
mnt@asec@org\.mozilla\.f.*-\d@pkg\.apk@classes\.dex@0x.* | |
MOZ_Assert | |
MOZ_Crash | |
mozcrt19.dll@0x.* | |
mozilla::gfx::Log<.* | |
mozilla::ipc::RPCChannel::Call | |
_NSRaiseError | |
(Nt|Zw)?WaitForSingleObject(Ex)? | |
(Nt|Zw)?WaitForMultipleObjects(Ex)? | |
nvmap@0x.* | |
org\.mozilla\.f.*-\d\.apk@0x.* | |
PR_WaitCondVar | |
RaiseException | |
RtlpAdjustHeapLookasideDepth | |
std::_Atomic_fetch_add_4 | |
std::panicking::.* | |
system@framework@.*\.jar@classes\.dex@0x.* | |
___TERMINATING_DUE_TO_UNCAUGHT_EXCEPTION___ | |
WaitForSingleObjectExImplementation | |
WaitForMultipleObjectsExImplementation | |
RealMsgWaitFor.* | |
_ZdlPv | |
zero""".split('\n'))) | |
prefix_signature_re = re.compile( | |
'|'.join("""@0x0 | |
.*CrashAtUnhandlableOOM | |
Abort | |
.*abort | |
.*alloc_impl | |
_alloca_probe.* | |
__android_log_assert | |
arena_.* | |
BaseGetNamedObjectDirectory | |
.*calloc | |
cert_.* | |
CERT_.* | |
CFRelease | |
_chkstk | |
CleanupPerAppKey | |
CrashInJS | |
__delayLoadHelper2 | |
dlmalloc | |
dlmalloc_trim | |
dvm.* | |
EtwEventEnabled | |
extent_.* | |
fastcopy_I | |
fastzero_I | |
_files_getaddrinfo | |
.*free | |
free_impl | |
GCGraphBuilder::NoteXPCOMChild | |
getanswer | |
HandleInvalidParameter | |
HeapFree | |
huge_dalloc | |
huge_palloc | |
ialloc | |
imalloc | |
init_library | |
InvalidArrayIndex_CRASH | |
invalid_parameter_noinfo | |
_invalid_parameter_noinfo | |
isalloc | |
jemalloc_crash | |
je_.* | |
JNI_CreateJavaVM | |
_JNIEnv.* | |
JNI_GetCreatedJavaVM.* | |
js::AutoCompartment::AutoCompartment.* | |
js::AutoEnterOOMUnsafeRegion::crash | |
js::detail::HashTable<.*>::.* | |
js::HashSet<.*>::.* | |
js::HashMap<.*>::.* | |
js::LifoAlloc::getOrCreateChunk | |
JSAutoCompartment::JSAutoCompartment.* | |
JS_DHashTableEnumerate | |
JS_DHashTableOperate | |
JS_NewStringCopyZ.* | |
kill | |
__libc_android_abort | |
libobjc.A.dylib@0x1568. | |
(libxul\.so|xul\.dll|XUL)@0x.* | |
LL_.* | |
malloc | |
_MD_.* | |
memcmp | |
__memcmp16 | |
memcpy | |
memmove | |
memset | |
mozalloc_abort.* | |
mozalloc_handle_oom | |
moz_free | |
mozilla::AndroidBridge::AutoLocalJNIFrame::~AutoLocalJNIFrame | |
mozilla::CondVar::.* | |
mozilla::ipc::LogicError | |
mozilla::ipc::MessageChannel::AssertWorkerThread | |
mozilla::ipc::MessageChannel::Call | |
mozilla::ipc::MessageChannel::CxxStackFrame::CxxStackFrame | |
mozilla::ipc::MessageChannel::Send | |
mozilla::ipc::RPCChannel::Call | |
mozilla::ipc::RPCChannel::CxxStackFrame::CxxStackFrame | |
mozilla::ipc::RPCChannel::EnteredCxxStack | |
mozilla::ipc::RPCChannel::Send | |
mozilla::layers::CompositorD3D11::Failed | |
mozilla::layers::CompositorD3D11::HandleError | |
mozilla.*FatalError | |
moz_xmalloc | |
moz_xrealloc | |
MOZ_CrashOOL | |
MOZ_CrashPrintf | |
msvcr120\.dll@0x.* | |
\<name omitted\> | |
NP_Shutdown | |
(NS_)?(Lossy)?(Copy|Append|Convert).*UTF.* | |
nsACString_internal::Assign.* | |
nsAString_internal::Assign.* | |
nsACString_internal::BeginWriting | |
nsAString_internal::BeginWriting | |
nsACString_internal::SetCapacity | |
NS_strcmp | |
nsBaseHashtable<.*>::.* | |
nsClassHashtable<.*>::.* | |
nsCOMPtr.* | |
NS_ABORT_OOM.* | |
nsDataHashtable<.*>::.* | |
NS_DebugBreak.* | |
nsDebugImpl::Abort | |
nsDependentString::nsDependentString | |
nsEventQueue::GetEvent | |
nsThread::GetEvent | |
nsThread::nsChainedEventQueue::GetEvent | |
[-+]\[NSException raise(:format:(arguments:)?)?\] | |
nsInterfaceHashtable<.*>::.* | |
nsJSThingHashtable<.*>::.* | |
nsObjCExceptionLogAbort | |
nsRefPtr.* | |
NSS.* | |
nss.* | |
nsTArray<.* | |
nsTArray_base<.* | |
nsTArray_Impl<.* | |
nsTHashtable<.*>::.* | |
nsThread::Shutdown | |
NtUser.* | |
objc_exception_throw | |
objc_msgSend | |
operator new | |
<.*>::operator() | |
PLDHashTable::.* | |
PL_.* | |
port_.* | |
PORT_.* | |
_PR_.* | |
PR_.* | |
.*ProcessNextEvent.* | |
__psynch_cvwait | |
_pthread_cond_wait | |
pthread_mutex_lock | |
_purecall | |
raise | |
realloc | |
recv | |
.*ReentrantMonitor::Wait.* | |
RefPtr.* | |
_RTC_Terminate | |
Rtl.* | |
_Rtl.* | |
__Rtl.* | |
__rust_start_panic | |
SEC_.*Item | |
seckey_.* | |
SECKEY_.* | |
__security_check_cookie | |
send | |
setjmp | |
sigblock | |
sigprocmask | |
SocketAccept | |
SocketAcceptRead | |
SocketAvailable | |
SocketAvailable64 | |
SocketBind | |
SocketClose | |
SocketConnect | |
SocketGetName | |
SocketGetPeerName | |
SocketListen | |
SocketPoll | |
SocketRead | |
SocketRecv | |
SocketSend | |
SocketShutdown | |
SocketSync | |
SocketTransmitFile | |
SocketWrite | |
SocketWritev | |
ssl_.* | |
SSL_.* | |
std::_Allocate.* | |
std::list<.*>::.* | |
strcat | |
strncmp | |
ssl3_.* | |
strchr | |
strcmp | |
strcpy | |
.*strdup | |
strlen | |
strncpy | |
strzcmp16 | |
strstr | |
__swrite | |
TlsGetValue | |
TouchBadMemory | |
vcruntime140\.dll@0x.* | |
_VEC_memcpy | |
_VEC_memzero | |
.*WaitFor.* | |
wcslen | |
__wrap_realloc | |
WSARecv.* | |
WSASend.* | |
_ZdaPvRKSt9nothrow_t" | |
zzz_AsmCodeRange_.* | |
.*DebugAbort.* | |
mozilla::ipc::MessageChannel::~MessageChannel.* | |
mozilla::MakeUnique<.*> | |
aticfx32\.dll | |
aticfx64\.dll | |
atidxx32\.dll | |
atidxx64\.dll | |
atiu9pag\.dll | |
atiu9p64\.dll | |
atiumd6a\.dll | |
atiumdag\.dll | |
atiumdva\.dll | |
atiuxpag\.dll | |
igd10iumd32\.dll | |
igd10iumd64\.dll | |
igd10umd32\.dll | |
igd10umd64\.dll | |
igdumd32\.dll | |
igdumd64\.dll | |
igdumdim32\.dll | |
igdumdim64\.dll | |
igd11dxva32\.dll | |
igd11dxva64\.dll | |
igdusc32\.dll | |
igdusc64\.dll | |
nvd3dum\.dll | |
nvd3dumx\.dll | |
nvoglnt\.dll | |
nvumdshim\.dll | |
nvumdshimx\.dll | |
nvwgf2um\.dll | |
nvwgf2umx\.dll | |
nvapi\.dll | |
nvapi64\.dll | |
nvscpapi\.dll | |
nvoglv32\.dll | |
nvoglv64\.dll""".split('\n'))) | |
trim_dll_signature_re = re.compile( | |
'|'.join("""aticfx32\.dll.* | |
aticfx64\.dll.* | |
atidxx32\.dll.* | |
atidxx64\.dll.* | |
atiu9pag\.dll.* | |
atiu9p64\.dll.* | |
atiumd6a\.dll.* | |
atiumdag\.dll.* | |
atiumdva\.dll.* | |
atiuxpag\.dll.* | |
igd10iumd32\.dll.* | |
igd10iumd64\.dll.* | |
igd10umd32\.dll.* | |
igd10umd64\.dll.* | |
igdumd32\.dll.* | |
igdumd64\.dll.* | |
igdumdim32\.dll.* | |
igdumdim64\.dll.* | |
igd11dxva32\.dll.* | |
igd11dxva64\.dll.* | |
igdusc32\.dll.* | |
igdusc64\.dll.* | |
nvd3dum\.dll.* | |
nvd3dumx\.dll.* | |
nvoglnt\.dll.* | |
nvumdshim\.dll.* | |
nvumdshimx\.dll.* | |
nvwgf2um\.dll.* | |
nvwgf2umx\.dll.* | |
nvapi\.dll.* | |
nvapi64\.dll.* | |
nvscpapi\.dll.* | |
nvoglv32\.dll.* | |
nvoglv64\.dll.*""".split('\n'))) | |
signature_sentinels = """_purecall | |
Java_org_mozilla_gecko_GeckoAppShell_reportJavaCrash | |
google_breakpad::ExceptionHandler::HandleInvalidParameter""".split('\n') | |
signature_sentinels.append( | |
( | |
'mozilla::ipc::RPCChannel::Call(IPC::Message*, IPC::Message*)', | |
lambda x: ( | |
'CrashReporter::CreatePairedMinidumps(void*, ' | |
'unsigned long, nsAString_internal*, nsILocalFile**, ' | |
'nsILocalFile**)' | |
) in x | |
) | |
) | |
# # Query | |
# Gather one day of crashes with stackTraces | |
# In[6]: | |
import datetime | |
import traceback | |
from time import sleep | |
# ignoring "main" and null since null should imply "main" (also, I think the RTD are wrong on this point) | |
PROCESS_TYPES = ['content', 'gpu'] | |
def log(key): | |
print "[{}] {}".format(datetime.datetime.now(),key) | |
def top_crashers(qds, qde, qw, ssc, ptype): | |
if qw: | |
print "Top 50 {} process crashers from {}/{}/{} to {}/{}/{}\nwithin the first {} weeks since profile creation\n---".format(ptype, qds[:4], qds[4:6], qds[6:], qde[:4], qde[4:6], qde[6:], int(qw/7)) | |
else: | |
print "Top 50 {} process crashers from {}/{}/{} to {}/{}/{}\nwith no consideration of profile creation date\n---".format(ptype, qds[:4], qds[4:6], qds[6:], qde[:4], qde[4:6], qde[6:]) | |
log("pre-top_crashers for loop") | |
for list in ssc[:50]: | |
print "{}\t{}\t{}".format(list[0], list[1], ptype) | |
log("post-top_crashers for loop") | |
def stats(errs, res): | |
rate = 0 | |
if res > 0: | |
rate = float(len(errs))/res | |
print "Error rate: {:.4%} ({} total, {} errors)".format(rate, res, len(errs)) | |
# If it's not a long list of errors, show them | |
if len(errs) > 0: | |
if len(errs) > 50: | |
print "Errors (more than 50, only 50 shown):" | |
print '\n'.join(errs[:50]) | |
else: | |
print "Errors:" | |
print '\n'.join(errs) | |
def get_dataset(date_start, date_end, channel): | |
"""Return crash pings for that channel on that date.""" | |
if date_start != date_end: | |
pings = Dataset.from_source("telemetry") .where(docType='crash', appUpdateChannel=channel, submissionDate=lambda x: (x >= date_start and x <= date_end)) .records(sc, sample=1.0) | |
else: # they're equal, just use one date | |
one_date = date_start | |
pings = Dataset.from_source("telemetry") .where(docType='crash', appUpdateChannel=channel, submissionDate=one_date) .records(sc, sample=1.0) | |
return pings | |
def pingsig(ping): | |
try: | |
symbolicated_threads = symbolicate_ping(ping) | |
if symbolicated_threads: | |
signature = generate_signature(ping, symbolicated_threads) | |
else: | |
signature = '' | |
return (ping['id'], signature, None) | |
except ValueError as e: | |
return (ping['id'], None, e[0]) | |
except Exception as e: | |
return (ping['id'], None, traceback.format_exc()) | |
def qualify_ping(ping, time_delta, epoch, process_type): | |
"""Return True for pings that meet the specified criteria.""" | |
retval = False | |
step1, step2, step3 = False, False, False | |
submission_date = ping['meta'].get('submissionDate', None) | |
if time_delta and submission_date: | |
querydate = datetime.datetime.strptime(submission_date, "%Y%m%d").date() | |
submission_date = (querydate - epoch).days | |
process_filter = 'main' # or blank, which is also main | |
if process_type in PROCESS_TYPES: | |
process_filter = process_type | |
try: | |
step1 = ping['payload'].get('processType', None) | |
except: | |
print "processType error" | |
else: | |
if (step1 is None) or (step1 == ''): | |
step1 = 'main' | |
if step1 == process_filter: | |
if time_delta and submission_date: | |
try: | |
step2 = int(ping['environment']['profile'].get('creationDate', 0)) + time_delta >= submission_date and submission_date >= int(ping['environment']['profile'].get('creationDate', 0)) | |
except: | |
print "date check error" | |
else: | |
if step2: | |
try: | |
step3 = ping['payload'].get('stackTraces', None) != None | |
except: | |
print "stackTraces error" | |
else: | |
if step3: | |
retval = True | |
else: # no time_delta (or submission_date, so time_delta would be moot) | |
try: | |
step2 = ping['payload'].get('stackTraces', None) != None | |
except: | |
print "stackTraces error (no date)" | |
else: | |
if step2: | |
retval = True | |
return retval | |
def query_pings(query_date_start, query_date_end, pings, process_type): | |
"""Given pings and process type, map signatures and output top crashers.""" | |
total_errors = 0 | |
log("pre-pings.count()") | |
total_results = pings.count() | |
print "\nTotal {} process results: {}".format(process_type, total_results) | |
if total_results > 0: | |
log("pre-pings.map(pingsig)") | |
results = pings.map(pingsig) | |
results.cache() | |
log("pre-signatures flatMap") | |
signatures = results.flatMap(lambda r: [r[1]] if r[1] != None else []) | |
log("pre-errors flatMap") | |
errors = results.flatMap(lambda r: [r[0] + " " + r[2]] if r[2] != None else []) | |
log("pre-errors.collect") | |
total_errors = errors.collect() | |
log("pre-stack_counts") | |
stack_counts = signatures.map(lambda sig: (sig, 1)).countByKey() | |
print "Stack counts: {}".format(len(stack_counts)) | |
log("pre-sorted_stack_counts") | |
sorted_stack_counts = sorted(stack_counts.items(), key=lambda x: x[1], reverse=True) | |
log("pre-top_crashers") | |
top_crashers(query_date_start, query_date_end, query_window, sorted_stack_counts, process_type) | |
results.unpersist() | |
return total_errors, total_results | |
def query(pings, profile_window, submission_date_start, submission_date_end, channel, process_type=None): | |
"""Given a submission date, channel, possibly-null consideration | |
window, and optional process type, get and query all matching pings. | |
If no process type is specified, all process types will be output; if | |
consideration window is None, all pings with a matching submission date | |
(regardless of profile creation date) will be output.""" | |
time_delta = profile_window | |
epoch = datetime.datetime.utcfromtimestamp(0).date() | |
def query_stats(time_delta, process_type, errors_count, results_count): | |
"""Output somewhat meaningful statistics for that query.""" | |
if time_delta: | |
print "\nStats and errors for {} days ({} process)".format(time_delta, process_type) | |
else: | |
print "\nStats and errors ({} process)".format(process_type) | |
stats(errors_count, results_count) | |
log("query if/else") | |
if process_type: | |
log("pre-qualify_ping (if)") | |
subset = pings.filter(lambda p: qualify_ping(p, time_delta, epoch, process_type)) | |
log("post-qualify_ping") | |
errors_count, results_count = query_pings(submission_date_start, submission_date_end, subset, process_type) | |
log("post-query_pings") | |
if results_count > 0: | |
query_stats(time_delta, process_type, errors_count, results_count) | |
else: | |
log("pre-qualify_ping (else)") | |
all_processes = ['main', 'content', 'gpu'] | |
for each_process in all_processes: | |
subset = pings.filter(lambda p: qualify_ping(p, time_delta, epoch, each_process)) | |
log("post-qualify_ping") | |
errors_count, results_count = query_pings(submission_date_start, submission_date_end, subset, each_process) | |
log("post-query_pings") | |
if results_count > 0: | |
query_stats(time_delta, each_process, errors_count, results_count) | |
log("post-query_stats") | |
# In[7]: | |
start_date = '20170525' | |
end_date = '20170525' | |
query_channel = 'release' | |
pings = get_dataset(start_date, end_date, query_channel) | |
pings.cache() | |
# ### Top 50 crashers (by signature) within the first two weeks of use | |
# In[8]: | |
query_window = TWO_WEEKS | |
# process_type = None | |
# pings = query(query_window, query_date, query_channel, process_type) | |
# NOTE: if process_type is not used, all process types will be output | |
query(pings, query_window, start_date, end_date, query_channel) | |
# ### Top 50 crashers (by signature) within the first six weeks of use | |
# In[9]: | |
query_window = SIX_WEEKS | |
# process_type = None | |
# pings = query(query_window, query_date, query_channel, process_type) | |
# NOTE: if process_type is not used, all process types will be output | |
query(pings, query_window, start_date, end_date, query_channel) | |
# ### Heavy users | |
# In[10]: | |
date_dataframe = spark.read.parquet("s3://telemetry-parquet/main_summary/v4/submission_date_s3=20170525/sample_id=42/") | |
date_dataframe.registerTempTable("heavyUsers") | |
heavy_user_clientidsRDD = spark.sql("SELECT client_id FROM heavyUsers GROUP BY client_id HAVING SUM(scalar_parent_browser_engagement_total_uri_count) > 1400") | |
heavy_user_clientids = heavy_user_clientidsRDD.select("client_id").rdd.flatMap(lambda x: x).collect() | |
# list of heavy user client IDs, use as a filter for the same (above) queries | |
def query_heavy(pings, submission_date_start, submission_date_end, channel, process_type=None): | |
"""Mostly re-use of previous query, but nulling some filters and | |
adding one for heavy client IDs.""" | |
time_delta = None | |
epoch = None | |
print "Top crashers for heavy users" | |
# restrict to client IDs that qualify as heavy users (above) | |
# Joining this is a) above my pay grade and b) probably not necessary for a relatively small clientId list | |
# subset = pings.join(heavy_user_clientids, pings.clientId == heavy_user_clientids.client_id) | |
log("pre-pings.filter") | |
subset = pings.filter(lambda x: x.get('clientId') in heavy_user_clientids) | |
def query_stats(process_type, errors_count, results_count): | |
"""Output somewhat meaningful statistics for that query.""" | |
print "\nStats and errors ({} process)".format(process_type) | |
stats(errors_count, results_count) | |
log("query if/else") | |
if process_type: | |
log("pre-qualify_ping (if)") | |
subset = subset.filter(lambda p: qualify_ping(p, time_delta, epoch, process_type)) | |
log("post-qualify_ping") | |
errors_count, results_count = query_pings(submission_date_start, submission_date_end, subset, process_type) | |
log("post-query_pings") | |
if results_count > 0: | |
query_stats(time_delta, process_type, errors_count, results_count) | |
else: | |
log("pre-qualify_ping (else)") | |
subset.cache() | |
log("post-subset cache") | |
all_processes = ['main', 'content', 'gpu'] | |
for each_process in all_processes: | |
subset = subset.filter(lambda p: qualify_ping(p, time_delta, epoch, each_process)) | |
log("post-qualify_ping") | |
errors_count, results_count = query_pings(submission_date_start, submission_date_end, subset, each_process) | |
log("post-query_pings") | |
if results_count > 0: | |
query_stats(each_process, errors_count, results_count) | |
subset.unpersist() | |
log("post-query_stats") | |
# In[11]: | |
query_window = None | |
query_heavy(pings, start_date, end_date, query_channel) | |
# In[12]: | |
pings.unpersist() | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment