Skip to content

Instantly share code, notes, and snippets.

@karolba
Last active August 24, 2023 20:29
Show Gist options
  • Save karolba/fb4559fa34786a630fed3a1392af2a12 to your computer and use it in GitHub Desktop.
Save karolba/fb4559fa34786a630fed3a1392af2a12 to your computer and use it in GitHub Desktop.
A very quick-and-dirty strace to json process tree parser
#!/bin/zsh
# Generate strace.* files like that:
# strace -o strace -D -qqq --no-abbrev -y -Y -s 16384 -ff -ttt --seccomp-bpf -e "trace=fork,vfork,clone,fchdir,chdir,/exec*" -e signal=none [...]
emulate sh
# Speed up grep and other similar tools
export LC_ALL=C
first=$(echo strace.* | tr ' ' '\n' | sort --version-sort | head -n1 | sed 's/^strace\.//')
declare -A proc_parent=()
declare -A proc_children=()
declare -A proc_last_known_envp=()
declare -A proc_directory=(
["$first"]="$PWD"
)
declare -A proc_execve_calls_json_lines=()
declare -A proc_execve_calls_json_line_count=()
count() { echo $#; }
# busybox sort and comm are for some reason faster for this script than the GNU ones
if command -v busybox >/dev/null; then
grep() { busybox grep "$@"; }
comm() { busybox comm "$@"; }
fi
sort_envp() {
# bash:
#printf '%s\n' "$1" | grep -Eo '"(\\.|[^"\\])*"' | sort -S 10%
# faster version for zsh
printf '%s\n' "${(n@f)$(printf '%s\n' "$1" | grep -Eo '"(\\.|[^"\\])*"')}"
}
json_lines_to_json_array() {
printf '%s' "${1//$'\n'/,}"
}
while read -r _time type process data; do
case "$type" in
fork)
parent=$process
child=$data
proc_parent[$child]="$parent"
proc_directory[$child]="${proc_directory[$parent]}"
proc_last_known_envp[$child]="${proc_last_known_envp[$parent]}"
proc_children[$parent]="${proc_children[$parent]} $child"
;;
chdir)
new_dir=$data
proc_directory[$process]="$new_dir"
;;
execve)
execve_call=$data
# for bash:
#prog=$(printf '%s\n' "$execve_call" | grep -Eo '"(\\.|[^"\\])*"' | head -n1)
#mapfile -t < <(printf '%s\n' "$execve_call" | grep -Eo '[[](,| |"(\\.|[^"\\])*")*[]]')
#argv=${MAPFILE[0]}
#envp=$(sort_envp "${MAPFILE[1]}")
# for zsh:
prog_argv_and_envp=("${(@f)$(printf '%s\n' "$execve_call" | grep -Eo '[[](,| |"(\\.|[^"\\])*")*[]]|"(\\.|[^"\\])*"')}")
prog=${prog_argv_and_envp[0]}
argv=${prog_argv_and_envp[1]}
envp=$(sort_envp "${prog_argv_and_envp[2]}")
parent_envp=${proc_last_known_envp[$process]}
proc_last_known_envp[$process]="$envp"
if [[ "$envp" == "$parent_envp" ]]; then
envp_new=
envp_deleted=
else
envp_new=$(comm -13 <(printf '%s\n' "$parent_envp") <(printf '%s\n' "$envp"))
envp_deleted=$(comm -23 <(printf '%s\n' "$parent_envp") <(printf '%s\n' "$envp"))
fi
directory=${proc_directory[$process]}
envp_new_part=${envp_new:+"$(printf ', "envp-new": [%s]' "$(json_lines_to_json_array "$envp_new")")"}
envp_deleted_part=${envp_deleted:+"$(printf ', "envp-deleted": [%s]' "$(json_lines_to_json_array "$envp_deleted")")"}
json=$(printf '{"prog": %s, "argv": %s, "dir": "%s" %s %s}\n' \
"$prog" "$argv" "$directory" \
"$envp_new_part" "$envp_deleted_part")
proc_execve_calls_json_lines[$process]=$(printf '%s\n' ${proc_execve_calls_json_lines[$process]+"${proc_execve_calls_json_lines[$process]}"} "$json")
(( proc_execve_calls_json_line_count[$process]++ ))
;;
*) echo "Unknown type in $_time $type $arg1 $arg2"; exit 1 ;;
esac
done < <(sort -m -k 1 \
<(grep -E '^[0-9.]* (clone|fork|vfork)' strace.* | awk '$2~/^(clone|fork|vfork)/' | sed -r 's/([0-9])(<[^>]*>)/\1/' | awk -F '[: ]' '{sub("^strace.","");} {print $2, "fork", $1, $NF}' | sort -k 1) \
<(grep -E '^[0-9.]* f?chdir' strace.* | awk '{sub("^strace\\.","");sub(":", " ")} $3~/^f?chdir/ && $NF == 0' | sed 's/("/ /;s/") = 0$//' | awk 'function set123(a,b,c){$1=a;$2=b;$3=c}{set123($2, $3, $1); print}' | sort -k 1) \
<(grep -E '^[0-9.]* exec' strace.* | awk '$2~/^exec/ && / = 0$/{sub("^strace\\.",""); print}' | sed 's/:/ /' | awk '{$1=($2 " execve " $1); $2=""; print}' | sort -k 1)
)
declare -r unneccessary_node_return=5
visit() {
local pid=$1
local execve_count=$((proc_execve_calls_json_line_count[$pid]))
local how_many_children=$(count ${proc_children[$pid]})
if ((how_many_children == 0 && execve_count == 0)); then
return $unneccessary_node_return
fi
if ((how_many_children == 1 && execve_count == 0)); then
visit ${proc_children[$pid]}
return 0
fi
local children
children=$(
ret=$unneccessary_node_return
for child in ${proc_children[$pid]}; do
visit $child
if [[ $? == 0 ]]; then
ret=0
fi
done
exit $ret
)
if ((execve_count == 0 && $? == unneccessary_node_return)); then
return $unneccessary_node_return
fi
if ((execve_count > 0)) || [[ $children ]]; then
printf '{\n'
case "$execve_count" in
0) ;;
1) printf '**%s,\n' "${proc_execve_calls_json_lines[$pid]}" ;;
*) printf '"execves": [%s],\n' "$(json_lines_to_json_array "${proc_execve_calls_json_lines[$pid]}")" ;;
esac
if [[ $children ]]; then
printf '"children": [%s],\n' "$children"
fi
printf '},\n'
else
return $unneccessary_node_return
fi
}
{
echo 'import json'
echo 'def j(data):'
echo ' print(json.dumps(data, indent=" "))'
echo 'j('
visit "$first"
echo ')'
} | python3 | python3 -c '
import json
def envp_walk(name, node):
if name in node and isinstance(node[name], list):
new_envp_new = {}
for e in node[name]:
varname, varvalue = e.split("=", 1)
new_envp_new[varname] = varvalue
node[name] = new_envp_new
if "execves" in node:
node["execves"] = [envp_walk(name, child) for child in node["execves"]]
if "children" in node:
node["children"] = [envp_walk(name, child) for child in node["children"]]
return node
def reduce_walk(node):
if "children" in node:
new_children = []
for child in node["children"]:
if len(child.keys()) == 1 and "children" in child:
new_children.extend(reduce_walk(grandchild) for grandchild in child["children"])
else:
new_children.append(reduce_walk(child))
node["children"] = new_children
return node
with open("/dev/stdin", "r") as file:
tree = json.load(file)
tree = envp_walk("envp-new", tree)
tree = envp_walk("envp-deleted", tree)
previous_json_dump = ""
while True:
tree = reduce_walk(tree)
json_dump = json.dumps(tree, indent=2)
if json_dump == previous_json_dump:
print(json_dump)
break
previous_json_dump = json_dump' | yq -P
# vim: sw=4 ts=4 sts=4 et
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment