Skip to content

Instantly share code, notes, and snippets.

@kingluo
Last active August 18, 2023 11:32
Show Gist options
  • Save kingluo/50b8589069f5f11a78d2136c1155d613 to your computer and use it in GitHub Desktop.
Save kingluo/50b8589069f5f11a78d2136c1155d613 to your computer and use it in GitHub Desktop.
How to trace nginx blocking issue via systemtap?

Test

# run openresty
/usr/local/openresty/nginx/sbin/nginx -p $PWD -c nginx.conf -g "daemon off;"

# run systemtap script
./samples/blocking-bt.sxx -D STP_NO_OVERLOAD --arg time=60 --skip-badvars -x 144081

# trigger the workload
time curl localhost:10000/run
32000
status=200
{"access-control-allow-credentials":"true","connection":"close","content-type":"application\/json","server":"gunicorn\/19.9.0","access-control-allow-origin":"*","date":"Thu, 29 Sep 2022 10:30:42 GMT","content-length":"287"}

real    0m3.090s
user    0m0.006s
sys     0m0.006s

# then you could see the systemtap output
--- lua_resume block 2424 ms
>>> Hot CPU backtraces
__memcpy_avx_unaligned_erms
@./workload.lua:8
@./workload.lua:18
@./workload.lua:26
=content_by_lua(nginx.conf:17):2
        367
ngx_md5_body
@./workload.lua:16
@./workload.lua:26
=content_by_lua(nginx.conf:17):2
        169
propagatemark
@./workload.lua:8
@./workload.lua:18
@./workload.lua:26
=content_by_lua(nginx.conf:17):2
        38
>>> Time-consuming syscall backtraces
0x0
C:meth_connect
@/usr/local/share/lua/5.1/socket/http.lua:130
@/usr/local/share/lua/5.1/socket/http.lua:354
C:protected_
@./workload.lua:28
=content_by_lua(nginx.conf:17):2
        235173 us
0x0
C:meth_receive
@/usr/local/share/lua/5.1/socket/http.lua:160
@/usr/local/share/lua/5.1/socket/http.lua:362
C:protected_
@./workload.lua:28
=content_by_lua(nginx.conf:17):2
        1472811 us
#!/usr/bin/env stap++
@use nginx.lua
@use luajit
global tracing = 0
global start_time, syscall_start_time
global bts, syscall_bts
function lua_bt()
{
if (@defined(@var("globalL", "$^exec_path"))) {
mL = @var("globalL", "$^exec_path")
} else {
mL = ngx_lua_get_main_lua_vm()
}
if (mL == 0) {
return ""
}
g = luajit_G(mL)
if (g == 0) {
return ""
}
L = luajit_cur_thread(g)
if (L == 0) {
return ""
}
bt = luajit_backtrace(L, g, 0)
if (bt != "") {
f = probefunc()
if (f != "") {
bt = f . "\n" . bt
}
return bt
}
return ""
}
probe $^arg_probe :default(timer.profile)
{
if (tid() == target() && tracing) {
bts[lua_bt()] <<< 1
}
}
%( "$^arg_time :default()" != "" %?
probe timer.s($^arg_time)
{
warn("Time's up. Quitting now...\n")
exit()
}
%)
probe begin
{
warn(sprintf("Start tracing %d ($^exec_path)\n", target()))
%( "$^arg_time" != "" %?
warn(sprintf("Please wait for $^arg_time seconds...\n"))
%:
warn("Hit Ctrl-C to end.\n")
%)
}
probe process("$^libluajit_path").function("lua_resume")
{
if (tid() == target()) {
tracing = 1
start_time = gettimeofday_ms()
}
}
probe process("$^libluajit_path").function("lua_resume").return
{
if (tid() == target()) {
tracing = 0
elapsed = gettimeofday_ms() - start_time
if (elapsed >= $^arg_min_elapsed :default(200)) {
printf("--- lua_resume block %d ms\n", elapsed)
banner = 0
foreach (bt in bts- limit $^arg_limit :default(3)) {
cnt = @count(bts[bt])
if (cnt <= $^arg_min_bt :default(2)) {
break
}
if (banner == 0) {
printf(">>> Hot CPU backtraces\n")
banner = 1
}
printf("%s\t%d\n", bt, cnt)
}
banner = 0
foreach (lbt in syscall_bts- limit $^arg_syscall_limit :default(2)) {
total_us = @sum(syscall_bts[lbt])
if (total_us >= 1000) {
if (banner == 0) {
printf(">>> Time-consuming syscall backtraces\n")
banner = 1
}
printf("%s\t%d us\n", lbt, total_us)
}
}
printf("\n")
}
delete bts
delete syscall_bts
}
}
probe kernel.trace("sys_enter") {
if (tid() == target() && tracing) {
syscall_start_time = gettimeofday_us()
}
}
probe kernel.trace("sys_exit") {
if (tid() == target() && tracing) {
elapsed = gettimeofday_us() - syscall_start_time
if (elapsed >= $^arg_syscall_min_elapsed_us :default(400)) {
syscall_bts[lua_bt()] <<< elapsed
}
}
}
error_log /dev/stderr info;
worker_processes 1;
events {}
http {
init_by_lua_block {
-- jit.off()
}
server {
listen 10000;
location /run {
content_by_lua_block {
require("workload").run()
}
}
}
}
local http = require("socket.http")
local cjson = require("cjson")
math.randomseed(os.time())
local _M = {}
local function random_str(length)
local res = ""
for i = 1, length do
res = res .. string.char(math.random(97, 122))
end
return res
end
local function run_md5()
local cnt = 0
local buf = random_str(81920)
for _= 1,1000 do
cnt = cnt + #ngx.md5(buf)
end
return cnt
end
function _M.run()
local cnt = run_md5()
ngx.say(cnt)
local r, c, h = http.request {
method = "GET",
url = "http://httpbin.org/delay/1"
}
assert(r == 1)
ngx.say("status=", c)
ngx.say(cjson.encode(h))
end
return _M
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment