Skip to content

Instantly share code, notes, and snippets.

@mknyszek
Last active March 22, 2023 19:53
Show Gist options
  • Save mknyszek/58a30e291ee1016e6833e617ce511666 to your computer and use it in GitHub Desktop.
Save mknyszek/58a30e291ee1016e6833e617ce511666 to your computer and use it in GitHub Desktop.
tile38 $ benchstat tip+fpcallers.results tip+fpcallers+trace.results
name old time/op new time/op delta
Tile38WithinCircle100kmRequest 490µs ± 1% 494µs ± 1% +0.79% (p=0.019 n=10+10)
Tile38IntersectsCircle100kmRequest 566µs ± 1% 571µs ± 2% +0.96% (p=0.011 n=10+10)
Tile38KNearestLimit100Request 391µs ± 2% 394µs ± 1% +0.91% (p=0.004 n=9+10)
name old average-RSS-bytes new average-RSS-bytes delta
Tile38WithinCircle100kmRequest 5.38GB ± 1% 5.37GB ± 1% ~ (p=0.529 n=10+10)
Tile38IntersectsCircle100kmRequest 5.71GB ± 1% 5.70GB ± 1% ~ (p=0.631 n=10+10)
Tile38KNearestLimit100Request 7.26GB ± 0% 7.26GB ± 0% ~ (p=0.529 n=10+10)
name old peak-RSS-bytes new peak-RSS-bytes delta
Tile38WithinCircle100kmRequest 5.71GB ± 1% 5.70GB ± 1% ~ (p=0.579 n=10+10)
Tile38IntersectsCircle100kmRequest 5.94GB ± 1% 5.94GB ± 1% ~ (p=0.684 n=10+10)
Tile38KNearestLimit100Request 7.49GB ± 0% 7.50GB ± 0% ~ (p=0.720 n=9+10)
name old peak-VM-bytes new peak-VM-bytes delta
Tile38WithinCircle100kmRequest 6.42GB ± 0% 6.40GB ± 1% ~ (p=0.711 n=6+10)
Tile38IntersectsCircle100kmRequest 6.65GB ± 1% 6.64GB ± 1% ~ (p=0.541 n=10+10)
Tile38KNearestLimit100Request 8.18GB ± 0% 8.18GB ± 0% ~ (p=0.245 n=9+8)
name old p50-latency-ns new p50-latency-ns delta
Tile38WithinCircle100kmRequest 119k ± 2% 122k ± 1% +3.20% (p=0.000 n=10+10)
Tile38IntersectsCircle100kmRequest 173k ± 1% 176k ± 1% +1.43% (p=0.004 n=10+9)
Tile38KNearestLimit100Request 285k ± 2% 289k ± 1% +1.51% (p=0.000 n=10+10)
name old p90-latency-ns new p90-latency-ns delta
Tile38WithinCircle100kmRequest 722k ± 1% 725k ± 1% ~ (p=0.315 n=10+9)
Tile38IntersectsCircle100kmRequest 836k ± 2% 838k ± 1% ~ (p=0.631 n=10+10)
Tile38KNearestLimit100Request 741k ± 2% 745k ± 1% ~ (p=0.190 n=10+10)
name old p99-latency-ns new p99-latency-ns delta
Tile38WithinCircle100kmRequest 5.75M ± 2% 5.75M ± 2% ~ (p=0.971 n=10+10)
Tile38IntersectsCircle100kmRequest 5.83M ± 2% 5.91M ± 3% ~ (p=0.089 n=10+10)
Tile38KNearestLimit100Request 1.44M ± 3% 1.45M ± 2% ~ (p=0.684 n=10+10)
name old ops/s new ops/s delta
Tile38WithinCircle100kmRequest 12.2k ± 1% 12.1k ± 1% -0.79% (p=0.019 n=10+10)
Tile38IntersectsCircle100kmRequest 10.6k ± 1% 10.5k ± 2% -0.80% (p=0.022 n=9+10)
Tile38KNearestLimit100Request 15.4k ± 2% 15.2k ± 1% -0.91% (p=0.004 n=9+10)
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 2e98f895a3..b69da1be9c 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -472,6 +472,7 @@ type g struct {
raceignore int8 // ignore race detection events
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
+ traceLastEv byte // Event type of the last event traced by this goroutine
tracking bool // whether we're tracking this G for sched latency statistics
trackingSeq uint8 // used to decide whether to track this G
trackingStamp int64 // timestamp of when the G last started being tracked
@@ -495,6 +496,7 @@ type g struct {
labels unsafe.Pointer // profiler labels
timer *timer // cached timer for time.Sleep
selectDone atomic.Uint32 // are we participating in a select and did someone win the race?
+ traceLastStack uint32 // stack ID for the last traced event on this G
// goroutineProfiled indicates the status of this goroutine's stack for the
// current in-progress goroutine profile
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 7484c45f71..122b0aca2f 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -239,7 +239,7 @@ func StartTrace() error {
// Obtain current stack ID to use in all traceEvGoCreate events below.
stkBuf := make([]uintptr, traceStackSize)
- stackID := traceStackID(mp, stkBuf, 2)
+ stackID := uint64(traceStackID(mp, stkBuf, 2))
profBuf := newProfBuf(2, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid]
trace.cpuLogRead = profBuf
@@ -753,7 +753,34 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by
} else if skip == 0 {
buf.varint(0)
} else if skip > 0 {
- buf.varint(traceStackID(mp, buf.stk[:], skip))
+ if mp != nil && mp.curg != nil {
+ lastEv := mp.curg.traceLastEv
+ switch ev {
+ case traceEvGoStart, traceEvGoStartLocal:
+ switch lastEv {
+ case traceEvGoStop,
+ traceEvGoSched,
+ traceEvGoPreempt,
+ traceEvGoSleep,
+ traceEvGoBlock,
+ traceEvGoBlockSend,
+ traceEvGoBlockRecv,
+ traceEvGoBlockSelect,
+ traceEvGoBlockSync,
+ traceEvGoBlockCond,
+ traceEvGoBlockNet,
+ traceEvGoSysCall:
+ stackID = mp.curg.traceLastStack
+ default:
+ stackID = traceStackID(mp, buf.stk[:], skip)
+ }
+ default:
+ stackID = traceStackID(mp, buf.stk[:], skip)
+ }
+ } else {
+ stackID = traceStackID(mp, buf.stk[:], skip)
+ }
+ buf.varint(uint64(stackID))
}
evSize := buf.pos - startPos
if evSize > maxSize {
@@ -763,6 +790,10 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by
// Fill in actual length.
*lenp = byte(evSize - 2)
}
+ if mp != nil && mp.curg != nil {
+ mp.curg.traceLastEv = ev
+ mp.curg.traceLastStack = stackID
+ }
}
// traceCPUSample writes a CPU profile sample stack to the execution tracer's
@@ -877,7 +908,7 @@ const logicalStackSentinel = ^uintptr(0)
// stack table, and returns its unique ID. pcBuf should have a length equal to
// traceStackSize. skip controls the number of leaf frames to omit in order to
// hide tracer internals from stack traces, see CL 5523.
-func traceStackID(mp *m, pcBuf []uintptr, skip int) uint64 {
+func traceStackID(mp *m, pcBuf []uintptr, skip int) uint32 {
gp := getg()
curgp := mp.curg
nstk := 1
@@ -916,7 +947,7 @@ func traceStackID(mp *m, pcBuf []uintptr, skip int) uint64 {
nstk-- // skip runtime.main
}
id := trace.stackTab.put(pcBuf[:nstk])
- return uint64(id)
+ return id
}
// tracefpunwindoff returns false if frame pointer unwinding for the tracer is
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment