Last active
May 5, 2020 18:29
-
-
Save ReubenBond/6420bb828d3f37fc1324c374d16a1742 to your computer and use it in GitHub Desktop.
StallDumper - dump Windows process memory when it stops emitting ETW events for a given provider for too long
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Microsoft.Diagnostics.Tracing; | |
using Microsoft.Diagnostics.Tracing.Session; | |
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.Globalization; | |
using System.IO; | |
using System.Runtime.InteropServices; | |
using System.Threading; | |
using System.Threading.Tasks; | |
namespace StallDumper | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
if (args.Length == 0 || !Guid.TryParse(args[0], out var providerGuid)) | |
{ | |
Console.WriteLine($"Usage: {Process.GetCurrentProcess().ProcessName} <Provider GUID>"); | |
return; | |
} | |
var monitor = new ProcessMonitorManager( | |
providerGuid, | |
TimeSpan.FromSeconds(5), | |
$"StallMonitor_{providerGuid}", | |
process => | |
{ | |
var dumpFile = DumpCapture.CreateMiniDump(process); | |
Console.WriteLine($"Wrote memory dump to {dumpFile.FullName}"); | |
}); | |
Task.Run(monitor.Run).GetAwaiter().GetResult(); | |
} | |
} | |
public class ProcessMonitorManager | |
{ | |
private readonly Guid _providerGuid; | |
private readonly TimeSpan _threshold; | |
private readonly CancellationTokenSource _cancellation = new CancellationTokenSource(); | |
private readonly string _sessionName; | |
private readonly Action<Process> _onStall; | |
public ProcessMonitorManager(Guid providerGuid, TimeSpan threshold, string sessionName, Action<Process> onStall) | |
{ | |
_providerGuid = providerGuid; | |
_threshold = threshold; | |
_sessionName = sessionName; | |
_onStall = onStall; | |
} | |
public async Task Run() | |
{ | |
var monitors = new Dictionary<int, ProcessMonitor>(); | |
var session = new TraceEventSession(_sessionName, TraceEventSessionOptions.Create); | |
session.EnableProvider(_providerGuid, TraceEventLevel.Always); | |
session.Source.Dynamic.All += delegate (TraceEvent data) | |
{ | |
if (!monitors.TryGetValue(data.ProcessID, out var monitor)) | |
{ | |
if (string.Equals("ManifestData", data.EventName, StringComparison.OrdinalIgnoreCase)) | |
{ | |
return; | |
} | |
try | |
{ | |
var process = Process.GetProcessById(data.ProcessID); | |
monitor = monitors[data.ProcessID] = new ProcessMonitor(process, this); | |
Console.WriteLine($"Monitoring process {process.ProcessName} ({process.Id})"); | |
monitor.Start(); | |
} | |
catch (Exception exception) | |
{ | |
Console.WriteLine($"Failed to create monitor for process {data.ProcessName} ({data.ProcessID}): {exception}"); | |
} | |
} | |
monitor.OnEvent(data); | |
}; | |
Console.CancelKeyPress += (__, _) => | |
{ | |
Console.WriteLine("Received SIGINT, stopping"); | |
Stop(); | |
}; | |
Console.WriteLine("Monitoring activity"); | |
session.Source.Process(); | |
// Wait for exits | |
foreach(var monitor in monitors.Values) | |
{ | |
if (monitor.Stopped.IsCompleted) continue; | |
Console.WriteLine($"Waiting for monitor for {monitor.Name} to exit"); | |
await monitor.Stopped.ConfigureAwait(false); | |
} | |
void Stop() | |
{ | |
_cancellation.Cancel(); | |
session.Dispose(); | |
} | |
} | |
public void OnStall(Process process) | |
{ | |
ThreadPool.UnsafeQueueUserWorkItem(obj => | |
{ | |
var values = (Tuple<ProcessMonitorManager, Process>)obj; | |
values.Item1._onStall(values.Item2); | |
}, | |
Tuple.Create<ProcessMonitorManager, Process>(this, process)); | |
} | |
private class ProcessMonitor | |
{ | |
private readonly ProcessMonitorManager _manager; | |
private readonly Process _process; | |
private readonly int _pid; | |
private readonly TimeSpan _threshold; | |
private long _livenessStamp; | |
private Task _runTask; | |
public ProcessMonitor(Process process, ProcessMonitorManager manager) | |
{ | |
_manager = manager; | |
_process = process; | |
_pid = process.Id; | |
_threshold = _manager._threshold; | |
} | |
public string Name => $"{_process.ProcessName} ({_pid})"; | |
public void Start() | |
{ | |
_runTask = Task.Run(Run); | |
} | |
public Task Stopped => _runTask ?? Task.CompletedTask; | |
private async Task Run() | |
{ | |
try | |
{ | |
_livenessStamp = Stopwatch.GetTimestamp(); | |
var monitorLivenessStamp = Stopwatch.GetTimestamp(); | |
var lastReported = monitorLivenessStamp; | |
var didStall = false; | |
var stallStart = _livenessStamp; | |
while (!_manager._cancellation.IsCancellationRequested) | |
{ | |
var last = Volatile.Read(ref _livenessStamp); | |
var now = Stopwatch.GetTimestamp(); | |
var monitorStall = Delta(monitorLivenessStamp, now); | |
var targetStall = Delta(last, now); | |
monitorLivenessStamp = now; | |
if (monitorStall > _threshold) | |
{ | |
Console.WriteLine($"{Name} Monitoring process stalled for {monitorStall:c}"); | |
} | |
else if (_process.HasExited) | |
{ | |
Console.WriteLine($"{Name} exited"); | |
return; | |
} | |
else if (targetStall > _threshold) | |
{ | |
if (Delta(lastReported, now) > TimeSpan.FromSeconds(1)) | |
{ | |
Console.WriteLine($"{Name} has an ongoing stall, for approximately {targetStall:c}"); | |
lastReported = now; | |
} | |
if (!didStall) | |
{ | |
stallStart = last; | |
_manager.OnStall(_process); | |
} | |
didStall = true; | |
} | |
else | |
{ | |
if (didStall) | |
{ | |
var totalStall = Delta(stallStart, now); | |
Console.WriteLine($"{Name} stall ended. Approximate stall time: {totalStall:c}"); | |
} | |
didStall = false; | |
} | |
await Task.Delay(50); | |
} | |
} | |
catch (Exception exception) | |
{ | |
Console.WriteLine($"Exception in process monitor {Name}: {exception}"); | |
} | |
finally | |
{ | |
Console.WriteLine($"Exiting monitor {Name}"); | |
} | |
static TimeSpan Delta(long from, long to) | |
{ | |
var seconds = ((double)(to - from)) / Stopwatch.Frequency; | |
return TimeSpan.FromSeconds(seconds); | |
} | |
} | |
internal void OnEvent(TraceEvent data) | |
{ | |
Volatile.Write(ref _livenessStamp, Stopwatch.GetTimestamp()); | |
} | |
} | |
} | |
internal static class DumpCapture | |
{ | |
internal static FileInfo CreateMiniDump(Process process, MiniDumpType dumpType = MiniDumpType.MiniDumpWithFullMemory) | |
{ | |
var dumpFileName = $@"{process.ProcessName}-MiniDump-{DateTime.UtcNow.ToString("yyyy-MM-dd-HH-mm-ss-fffZ", CultureInfo.InvariantCulture)}.dmp"; | |
using (var stream = File.Create(dumpFileName)) | |
{ | |
var result = NativeMethods.MiniDumpWriteDump( | |
process.Handle, | |
process.Id, | |
stream.SafeFileHandle.DangerousGetHandle(), | |
dumpType, | |
IntPtr.Zero, | |
IntPtr.Zero, | |
IntPtr.Zero); | |
} | |
return new FileInfo(dumpFileName); | |
} | |
private static class NativeMethods | |
{ | |
[DllImport("Dbghelp.dll")] | |
public static extern bool MiniDumpWriteDump( | |
IntPtr hProcess, | |
int processId, | |
IntPtr hFile, | |
MiniDumpType dumpType, | |
IntPtr exceptionParam, | |
IntPtr userStreamParam, | |
IntPtr callbackParam); | |
} | |
internal enum MiniDumpType | |
{ | |
MiniDumpNormal = 0x00000000, | |
MiniDumpWithDataSegs = 0x00000001, | |
MiniDumpWithFullMemory = 0x00000002, | |
MiniDumpWithHandleData = 0x00000004, | |
MiniDumpFilterMemory = 0x00000008, | |
MiniDumpScanMemory = 0x00000010, | |
MiniDumpWithUnloadedModules = 0x00000020, | |
MiniDumpWithIndirectlyReferencedMemory = 0x00000040, | |
MiniDumpFilterModulePaths = 0x00000080, | |
MiniDumpWithProcessThreadData = 0x00000100, | |
MiniDumpWithPrivateReadWriteMemory = 0x00000200, | |
MiniDumpWithoutOptionalData = 0x00000400, | |
MiniDumpWithFullMemoryInfo = 0x00000800, | |
MiniDumpWithThreadInfo = 0x00001000, | |
MiniDumpWithCodeSegs = 0x00002000, | |
MiniDumpWithoutManagedState = 0x00004000, | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<Project Sdk="Microsoft.NET.Sdk"> | |
<PropertyGroup> | |
<OutputType>Exe</OutputType> | |
<TargetFramework>net472</TargetFramework> | |
<LangVersion>latest</LangVersion> | |
</PropertyGroup> | |
<ItemGroup> | |
<PackageReference Include="Microsoft.Diagnostics.Tracing.TraceEvent" Version="2.0.55" /> | |
</ItemGroup> | |
</Project> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment