Last active
March 22, 2023 19:53
-
-
Save mknyszek/58a30e291ee1016e6833e617ce511666 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tile38 $ benchstat tip+fpcallers.results tip+fpcallers+trace.results | |
name old time/op new time/op delta | |
Tile38WithinCircle100kmRequest 490µs ± 1% 494µs ± 1% +0.79% (p=0.019 n=10+10) | |
Tile38IntersectsCircle100kmRequest 566µs ± 1% 571µs ± 2% +0.96% (p=0.011 n=10+10) | |
Tile38KNearestLimit100Request 391µs ± 2% 394µs ± 1% +0.91% (p=0.004 n=9+10) | |
name old average-RSS-bytes new average-RSS-bytes delta | |
Tile38WithinCircle100kmRequest 5.38GB ± 1% 5.37GB ± 1% ~ (p=0.529 n=10+10) | |
Tile38IntersectsCircle100kmRequest 5.71GB ± 1% 5.70GB ± 1% ~ (p=0.631 n=10+10) | |
Tile38KNearestLimit100Request 7.26GB ± 0% 7.26GB ± 0% ~ (p=0.529 n=10+10) | |
name old peak-RSS-bytes new peak-RSS-bytes delta | |
Tile38WithinCircle100kmRequest 5.71GB ± 1% 5.70GB ± 1% ~ (p=0.579 n=10+10) | |
Tile38IntersectsCircle100kmRequest 5.94GB ± 1% 5.94GB ± 1% ~ (p=0.684 n=10+10) | |
Tile38KNearestLimit100Request 7.49GB ± 0% 7.50GB ± 0% ~ (p=0.720 n=9+10) | |
name old peak-VM-bytes new peak-VM-bytes delta | |
Tile38WithinCircle100kmRequest 6.42GB ± 0% 6.40GB ± 1% ~ (p=0.711 n=6+10) | |
Tile38IntersectsCircle100kmRequest 6.65GB ± 1% 6.64GB ± 1% ~ (p=0.541 n=10+10) | |
Tile38KNearestLimit100Request 8.18GB ± 0% 8.18GB ± 0% ~ (p=0.245 n=9+8) | |
name old p50-latency-ns new p50-latency-ns delta | |
Tile38WithinCircle100kmRequest 119k ± 2% 122k ± 1% +3.20% (p=0.000 n=10+10) | |
Tile38IntersectsCircle100kmRequest 173k ± 1% 176k ± 1% +1.43% (p=0.004 n=10+9) | |
Tile38KNearestLimit100Request 285k ± 2% 289k ± 1% +1.51% (p=0.000 n=10+10) | |
name old p90-latency-ns new p90-latency-ns delta | |
Tile38WithinCircle100kmRequest 722k ± 1% 725k ± 1% ~ (p=0.315 n=10+9) | |
Tile38IntersectsCircle100kmRequest 836k ± 2% 838k ± 1% ~ (p=0.631 n=10+10) | |
Tile38KNearestLimit100Request 741k ± 2% 745k ± 1% ~ (p=0.190 n=10+10) | |
name old p99-latency-ns new p99-latency-ns delta | |
Tile38WithinCircle100kmRequest 5.75M ± 2% 5.75M ± 2% ~ (p=0.971 n=10+10) | |
Tile38IntersectsCircle100kmRequest 5.83M ± 2% 5.91M ± 3% ~ (p=0.089 n=10+10) | |
Tile38KNearestLimit100Request 1.44M ± 3% 1.45M ± 2% ~ (p=0.684 n=10+10) | |
name old ops/s new ops/s delta | |
Tile38WithinCircle100kmRequest 12.2k ± 1% 12.1k ± 1% -0.79% (p=0.019 n=10+10) | |
Tile38IntersectsCircle100kmRequest 10.6k ± 1% 10.5k ± 2% -0.80% (p=0.022 n=9+10) | |
Tile38KNearestLimit100Request 15.4k ± 2% 15.2k ± 1% -0.91% (p=0.004 n=9+10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go | |
index 2e98f895a3..b69da1be9c 100644 | |
--- a/src/runtime/runtime2.go | |
+++ b/src/runtime/runtime2.go | |
@@ -472,6 +472,7 @@ type g struct { | |
raceignore int8 // ignore race detection events | |
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine | |
+ traceLastEv byte // Event type of the last event traced by this goroutine | |
tracking bool // whether we're tracking this G for sched latency statistics | |
trackingSeq uint8 // used to decide whether to track this G | |
trackingStamp int64 // timestamp of when the G last started being tracked | |
@@ -495,6 +496,7 @@ type g struct { | |
labels unsafe.Pointer // profiler labels | |
timer *timer // cached timer for time.Sleep | |
selectDone atomic.Uint32 // are we participating in a select and did someone win the race? | |
+ traceLastStack uint32 // stack ID for the last traced event on this G | |
// goroutineProfiled indicates the status of this goroutine's stack for the | |
// current in-progress goroutine profile | |
diff --git a/src/runtime/trace.go b/src/runtime/trace.go | |
index 7484c45f71..122b0aca2f 100644 | |
--- a/src/runtime/trace.go | |
+++ b/src/runtime/trace.go | |
@@ -239,7 +239,7 @@ func StartTrace() error { | |
// Obtain current stack ID to use in all traceEvGoCreate events below. | |
stkBuf := make([]uintptr, traceStackSize) | |
- stackID := traceStackID(mp, stkBuf, 2) | |
+ stackID := uint64(traceStackID(mp, stkBuf, 2)) | |
profBuf := newProfBuf(2, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid] | |
trace.cpuLogRead = profBuf | |
@@ -753,7 +753,34 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by | |
} else if skip == 0 { | |
buf.varint(0) | |
} else if skip > 0 { | |
- buf.varint(traceStackID(mp, buf.stk[:], skip)) | |
+ if mp != nil && mp.curg != nil { | |
+ lastEv := mp.curg.traceLastEv | |
+ switch ev { | |
+ case traceEvGoStart, traceEvGoStartLocal: | |
+ switch lastEv { | |
+ case traceEvGoStop, | |
+ traceEvGoSched, | |
+ traceEvGoPreempt, | |
+ traceEvGoSleep, | |
+ traceEvGoBlock, | |
+ traceEvGoBlockSend, | |
+ traceEvGoBlockRecv, | |
+ traceEvGoBlockSelect, | |
+ traceEvGoBlockSync, | |
+ traceEvGoBlockCond, | |
+ traceEvGoBlockNet, | |
+ traceEvGoSysCall: | |
+ stackID = mp.curg.traceLastStack | |
+ default: | |
+ stackID = traceStackID(mp, buf.stk[:], skip) | |
+ } | |
+ default: | |
+ stackID = traceStackID(mp, buf.stk[:], skip) | |
+ } | |
+ } else { | |
+ stackID = traceStackID(mp, buf.stk[:], skip) | |
+ } | |
+ buf.varint(uint64(stackID)) | |
} | |
evSize := buf.pos - startPos | |
if evSize > maxSize { | |
@@ -763,6 +790,10 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by | |
// Fill in actual length. | |
*lenp = byte(evSize - 2) | |
} | |
+ if mp != nil && mp.curg != nil { | |
+ mp.curg.traceLastEv = ev | |
+ mp.curg.traceLastStack = stackID | |
+ } | |
} | |
// traceCPUSample writes a CPU profile sample stack to the execution tracer's | |
@@ -877,7 +908,7 @@ const logicalStackSentinel = ^uintptr(0) | |
// stack table, and returns its unique ID. pcBuf should have a length equal to | |
// traceStackSize. skip controls the number of leaf frames to omit in order to | |
// hide tracer internals from stack traces, see CL 5523. | |
-func traceStackID(mp *m, pcBuf []uintptr, skip int) uint64 { | |
+func traceStackID(mp *m, pcBuf []uintptr, skip int) uint32 { | |
gp := getg() | |
curgp := mp.curg | |
nstk := 1 | |
@@ -916,7 +947,7 @@ func traceStackID(mp *m, pcBuf []uintptr, skip int) uint64 { | |
nstk-- // skip runtime.main | |
} | |
id := trace.stackTab.put(pcBuf[:nstk]) | |
- return uint64(id) | |
+ return id | |
} | |
// tracefpunwindoff returns false if frame pointer unwinding for the tracer is |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment