Skip to content

Instantly share code, notes, and snippets.

@avih
Created August 25, 2018 14:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avih/f51008225d4a20a0981daed1faca4bc2 to your computer and use it in GitHub Desktop.
Save avih/f51008225d4a20a0981daed1faca4bc2 to your computer and use it in GitHub Desktop.
ffmpeg: configure: speedup x2-x8
From 4d0654798f50bcf1c68a07927511bde17ab723be Mon Sep 17 00:00:00 2001
From: "Avi Halachmi (:avih)" <avihpit@yahoo.com>
Date: Tue, 31 Jul 2018 03:22:52 +0300
Subject: [PATCH] configure: print timing information
Add the file tools/time.sh, and use it from configure to print timing
information and sorted summary, while isolating prominent sections.
The chosen sections/names are (in runtime order):
- Init
- Options review
- Compilation tests
- check_deps()
- Prepare deps
- flatten_extralibs_wrapper()
- Writing config
- print_enabled_components()
- Finalize
Where the foo_bar() names are execution solely of the function foo_bar, and
the rest are descriptive-ish.
---
configure | 21 ++++++++++++
tools/time.sh | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 115 insertions(+)
create mode 100644 tools/time.sh
diff --git a/configure b/configure
index b9c9d0b3..07398852 100755
--- a/configure
+++ b/configure
@@ -11,6 +11,9 @@
LC_ALL=C
export LC_ALL
+. "$(dirname "$0")"/tools/time.sh
+time_switch_to Init
+
# make sure we are running under a compatible shell
# try to make this part work with most shells
@@ -3633,6 +3636,8 @@ host_ldflags_filter=echo
target_path='$(CURDIR)'
+time_switch_to "Options review"
+
# since the object filename is not given with the -MM flag, the compiler
# is only able to print the basename, and we must add the path ourselves
DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< 2>/dev/null | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(@F),$(@D)/$(@F)," > $(@:.o=.d)'
@@ -4116,6 +4121,8 @@ exesuf() {
EXESUF=$(exesuf $target_os)
HOSTEXESUF=$(exesuf $host_os)
+time_switch_to "Compilation tests"
+
# set temporary file name
: ${TMPDIR:=$TEMPDIR}
: ${TMPDIR:=$TMP}
@@ -6720,11 +6727,15 @@ enable frame_thread_encoder
enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
+time_switch_to "check_deps()"
+
check_deps $CONFIG_LIST \
$CONFIG_EXTRA \
$HAVE_LIST \
$ALL_COMPONENTS \
+time_switch_to "Prepare deps"
+
enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86"
enabled avresample && warn "Building with deprecated library libavresample"
@@ -6772,10 +6783,14 @@ for linkunit in $LIBRARY_LIST; do
eval prepend ${linkunit}_extralibs $current_extralibs
done
+time_switch_to "flatten_extralibs_wrapper()"
+
for linkunit in $LIBRARY_LIST $PROGRAM_LIST $EXTRALIBS_LIST; do
eval ${linkunit}_extralibs=\$\(flatten_extralibs_wrapper ${linkunit}_extralibs\)
done
+time_switch_to "Writing config"
+
map 'enabled $v && intrinsics=${v#intrinsics_}' $INTRINSICS_LIST
for thread in $THREADS_LIST; do
@@ -7211,6 +7226,8 @@ cat > $TMPH <<EOF
#define AVUTIL_AVCONFIG_H
EOF
+time_switch_to "print_enabled_components()"
+
print_config AV_HAVE_ $TMPH $HAVE_LIST_PUB
echo "#endif /* AVUTIL_AVCONFIG_H */" >> $TMPH
@@ -7263,6 +7280,8 @@ print_enabled_components libavdevice/indev_list.c AVInputFormat indev_list $INDE
print_enabled_components libavdevice/outdev_list.c AVOutputFormat outdev_list $OUTDEV_LIST
print_enabled_components libavformat/protocol_list.c URLProtocol url_protocols $PROTOCOL_LIST
+time_switch_to Finalize
+
# Settings for pkg-config files
cat > $TMPH <<EOF
@@ -7293,3 +7312,5 @@ for lib in $LIBRARY_LIST; do
done
cp_if_changed $TMPH ffbuild/config.sh
+
+time_finish
diff --git a/tools/time.sh b/tools/time.sh
new file mode 100644
index 00000000..375fc28c
--- /dev/null
+++ b/tools/time.sh
@@ -0,0 +1,94 @@
+# time.sh - simple flat/top-level timing measurement helper for shell scripts
+# Copyright 2018 Avi Halachmi @License MIT https://github.com/avih/time.sh
+#
+# Example (see README.md for more info):
+# . /path/to/time.sh
+# time_switch_to foo
+# <do task foo>
+# time_switch_to "bar baz"
+# <do bar maybe with an argument baz>
+# time_finish
+
+
+[ "${TIME_PRINT-}" ] || TIME_PRINT=3
+
+# find a command to print a timestamp in ms, or fail to 'echo 0'.
+# perl/python will have bigger overhead, but we auto-compensate reasonably-ish.
+if [ -z "${TIME_CMD-}" ]; then
+ for TIME_CMD in \
+ 'date +%s%3N' \
+ 'python -c "from time import time; print (int(time()*1000))"' \
+ 'perl -MPOSIX=strftime -MTime::HiRes=time -le "print int(time*1000)"' \
+ 'date +%s000' \
+ 'echo 0' ;
+ do
+ _tsh_tmp=$(eval "$TIME_CMD") 2>/dev/null &&
+ case $_tsh_tmp in *[!0-9]*|'') ;; *) break; esac
+ done
+fi
+
+_tsh_started=
+_tsh_NL="
+"
+
+# stops the measurement if one was started, and start a new one named $1
+# only one measurement at a time is supported, and one (automatic) overall.
+time_switch_to() {
+ _tsh_now=$(eval "$TIME_CMD")
+
+ if [ "$_tsh_started" ]; then
+ _tsh_dur=$(( _tsh_now - _tsh_t0 - TIME_COMP ))
+ [ $_tsh_dur -ge 0 ] || _tsh_dur=0
+ time_total=$(( time_total + _tsh_dur ))
+ time_details="${time_details}${_tsh_dur} $_tsh_name${_tsh_NL}"
+ case $TIME_PRINT in 2|3) >&2 echo "[ $_tsh_name: $_tsh_dur ms ]"; esac
+ else
+ _tsh_started=yes; time_total=0; time_details=
+ fi
+
+ case ${1-} in
+ --|'') _tsh_started= ;; # "finish". caller can use total and details
+ *) _tsh_name=$1
+ _tsh_t0=$_tsh_now
+ case $TIME_PRINT in 1|3) >&2 echo "[ $1 ... ]"; esac
+ esac
+}
+
+# assess compensation in ms/measure, floored to err on the side of caution.
+# not perfect since cpu frequency scaling can change etc, but reasonable.
+_tsh_assess_overhead() {
+ TIME_PRINT=0; TIME_COMP=0
+ time_switch_to x; time_switch_to -- # warmup and reset
+ for x in 1 2 3 4; do time_switch_to $x; done; time_switch_to --
+ echo $(( time_total / 4 )) # 5 calls = 4 deltas, floored average
+}
+
+case ${TIME_COMP-} in *[!0-9]*|'') TIME_COMP=$(_tsh_assess_overhead); esac
+
+# $1: number, $2: number to be printed as rounded percentage (number) of $1
+_tsh_percent() {
+ [ "$1" -gt 0 ] || set -- 1 0
+ [ "$2" -ge 0 ] || set -- $1 0
+ echo $(( (200 * $2 / $1 + 1 ) / 2 ))
+}
+
+# print a summary and reset the measurements. slow-ish but outside the measure.
+# percentage values are rounded and might not add up to 100.
+time_finish() {
+ time_switch_to --
+ _tsh_shell="$(basename "$( (readlink /proc/$$/exe || ps -o comm= -p $$) 2>/dev/null)")"
+
+ >&2 printf "\n%s\n%s ms%s, %s ms/measure, %s\n%s\n" \
+ ----- \
+ $time_total "${TIME_TITLE:+ - $TIME_TITLE}" \
+ $TIME_COMP "$(uname)${_tsh_shell:+/$_tsh_shell}" \
+ -----
+
+ printf %s "$time_details" | sort -rn | \
+ (while read line; do
+ >&2 printf "%${#time_total}s ms %3s %% %s\n" \
+ "${line%% *}" "$(_tsh_percent $time_total $line)" "${line#* }"
+ done)
+
+ >&2 printf "%s\n" -----
+}
--
2.17.1
From 7593d9f6580afd8a1066df1e1910a817d7c2fff1 Mon Sep 17 00:00:00 2001
From: "Avi Halachmi (:avih)" <avihpit@yahoo.com>
Date: Mon, 30 Jul 2018 22:39:15 +0300
Subject: [PATCH 1/3] configure: flatten_extralibs_wrapper(): speedup x50 -
x200
Currently configure spends 50-70% of its runtime inside a single
function: flatten_extralibs[_wrapper] - which does string processing.
During its run, nearly 20K command substitutions (subshells) are used,
including its callees unique() and resolve(), which is the reason
for its lengthy run.
This commit avoids all subshells during its execution, speeding it up
by about two orders of magnitude, and reducing the overall configure
runtime by 50-70% .
resolve() is rewritten to avoid subshells, and in unique() and
flatten_extralibs() we "inline" the filter[_out] functionality.
Note that unique() now produces different output order than before.
Previously it kept the last occurence of re-occuring items, and now
it keeps the first. However, this shouldn't make a difference as it's
used for unique library dependencies where the order doesn't matter.
---
configure | 33 ++++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/configure b/configure
index b9c9d0b3..465d808d 100755
--- a/configure
+++ b/configure
@@ -845,21 +845,23 @@ prepend(){
}
unique(){
- var=$1
- uniq_list=""
- for tok in $(eval echo \$$var); do
- uniq_list="$(filter_out $tok $uniq_list) $tok"
+ unique_out=
+ eval unique_in=\$$1
+ for v in $unique_in; do
+ # " $unique_out" +space such that every item is surrounded with spaces
+ case " $unique_out" in *" $v "*) continue; esac # already in list
+ unique_out="$unique_out$v "
done
- eval "$var=\"${uniq_list}\""
+ eval $1=\$unique_out
}
resolve(){
- var=$1
- tmpvar=
- for entry in $(eval echo \$$var); do
- tmpvar="$tmpvar $(eval echo \$${entry})"
+ resolve_out=
+ eval resolve_in=\$$1
+ for v in $resolve_in; do
+ eval 'resolve_out="$resolve_out$'$v' "'
done
- eval "$var=\"${tmpvar}\""
+ eval $1=\$resolve_out
}
add_cppflags(){
@@ -6734,14 +6736,19 @@ if test $target_os = "haiku"; then
fi
flatten_extralibs(){
- unset nested_entries
+ nested_entries=
list_name=$1
eval list=\$${1}
for entry in $list; do
entry_copy=$entry
resolve entry_copy
- append nested_entries $(filter '*_extralibs' $entry_copy)
- flat_entries=$(filter_out '*_extralibs' $entry_copy)
+ flat_entries=
+ for e in $entry_copy; do
+ case $e in
+ *_extralibs) nested_entries="$nested_entries$e ";;
+ *) flat_entries="$flat_entries$e ";;
+ esac
+ done
eval $entry="\$flat_entries"
done
append $list_name "$nested_entries"
--
2.17.1
From d1cf17fdb518b3a456bbac61e306599d0a4a7400 Mon Sep 17 00:00:00 2001
From: "Avi Halachmi (:avih)" <avihpit@yahoo.com>
Date: Mon, 30 Jul 2018 22:48:04 +0300
Subject: [PATCH 2/3] configure: print_enabled_components(): speedup x4 - x10
Inside print_enabled components, the filter_list case invokes sed
about 350 times to parse the same source file and extract different
info for each arg. This is never instant, and on systems where fork is
slow (notably MSYS2/Cygwin on windows) it takes many seconds.
Change it to use sed once on the source file and set env vars with the
parse results, then use these results inside the loop.
Additionally, the cases of indev_list and outdev_list are very
infrequent, but nevertheless they're faster, and arguably cleaner, with
shell parameter substitutions than with command substitutions.
---
configure | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/configure b/configure
index 465d808d..b74c44e7 100755
--- a/configure
+++ b/configure
@@ -7224,9 +7224,10 @@ echo "#endif /* AVUTIL_AVCONFIG_H */" >> $TMPH
cp_if_changed $TMPH libavutil/avconfig.h
-full_filter_name(){
- sed -n "s/^extern AVFilter ff_\([avfsinkrc]\{2,5\}\)_$1;/\1_$1/p" $source_path/libavfilter/allfilters.c
-}
+# full_filter_name_foo=vf_foo
+# full_filter_name_bar=asrc_bar
+# ...
+eval "$(sed -n "s/^extern AVFilter ff_\([avfsinkrc]\{2,5\}\)_\(.*\);/full_filter_name_\2=\1_\2/p" $source_path/libavfilter/allfilters.c)"
# generate the lists of enabled components
print_enabled_components(){
@@ -7239,13 +7240,13 @@ print_enabled_components(){
if enabled $c; then
case $name in
filter_list)
- c=$(full_filter_name $(remove_suffix _filter $c))
+ eval c=\$full_filter_name_${c%_filter}
;;
indev_list)
- c=$(add_suffix _demuxer $(remove_suffix _indev $c))
+ c=${c%_indev}_demuxer
;;
outdev_list)
- c=$(add_suffix _muxer $(remove_suffix _outdev $c))
+ c=${c%_outdev}_muxer
;;
esac
printf " &ff_%s,\n" $c >> $TMPH
--
2.17.1
From a068913d2c4c460ef0a256cb3676bd1227f59667 Mon Sep 17 00:00:00 2001
From: "Avi Halachmi (:avih)" <avihpit@yahoo.com>
Date: Sun, 5 Aug 2018 06:16:44 +0300
Subject: [PATCH 3/3] configure: check_deps(): x4 - x25 speedup
check_deps() recursively enables/disables components, and its loop is
iterated nearly 6000 times. It's particularly slow in bash - currently
consuming more than 50% of configure runtime, and about 20% with other
shells.
This commit applies few local optimizations, most effective first:
- Use $1 $2 ... instead of pushvar/popvar, and same at enable_deep*
- Abort early in one notable case - empty deps, to avoid costly no-op.
- Smaller changes which do add up:
- Handle ${cfg}_checking locally instead of via enable[d]/disable
- ${cfg}_checking: test done before inprogress - x2 faster in 50%+
- one eval instead of several at the empty-deps early abort path.
- The "actual work" part is unmodified - just its surroundings.
Biggest speedups (relative and absolute) are observed with bash.
---
configure | 86 +++++++++++++++++++++++++++++--------------------------
1 file changed, 46 insertions(+), 40 deletions(-)
diff --git a/configure b/configure
index b74c44e7..e9cb7703 100755
--- a/configure
+++ b/configure
@@ -676,13 +676,10 @@ disable_sanitized(){
do_enable_deep(){
for var; do
enabled $var && continue
- eval sel="\$${var}_select"
- eval sgs="\$${var}_suggest"
- pushvar var sgs
- enable_deep $sel
- popvar sgs
- enable_deep_weak $sgs
- popvar var
+ set -- $var
+ eval enable_deep \$${var}_select
+ var=$1
+ eval enable_deep_weak \$${var}_suggest
done
}
@@ -694,9 +691,9 @@ enable_deep(){
enable_deep_weak(){
for var; do
disabled $var && continue
- pushvar var
+ set -- $var
do_enable_deep $var
- popvar var
+ var=$1
enable_weak $var
done
}
@@ -756,40 +753,49 @@ is_in(){
return 1
}
+# The cfg loop is very hot (several thousands iterations), and in bash also
+# potentialy quite slow. Try to abort the iterations early, preferably without
+# calling functions. 70%+ of the time cfg is already done or without deps.
check_deps(){
for cfg; do
- enabled ${cfg}_checking && die "Circular dependency for $cfg."
- disabled ${cfg}_checking && continue
- enable ${cfg}_checking
-
- eval dep_all="\$${cfg}_deps"
- eval dep_any="\$${cfg}_deps_any"
- eval dep_con="\$${cfg}_conflict"
- eval dep_sel="\$${cfg}_select"
- eval dep_sgs="\$${cfg}_suggest"
- eval dep_ifa="\$${cfg}_if"
- eval dep_ifn="\$${cfg}_if_any"
-
- pushvar cfg dep_all dep_any dep_con dep_sel dep_sgs dep_ifa dep_ifn
- check_deps $dep_all $dep_any $dep_con $dep_sel $dep_sgs $dep_ifa $dep_ifn
- popvar cfg dep_all dep_any dep_con dep_sel dep_sgs dep_ifa dep_ifn
-
- [ -n "$dep_ifa" ] && { enabled_all $dep_ifa && enable_weak $cfg; }
- [ -n "$dep_ifn" ] && { enabled_any $dep_ifn && enable_weak $cfg; }
- enabled_all $dep_all || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not all dependencies are satisfied: $dep_all"; }
- enabled_any $dep_any || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not any dependency is satisfied: $dep_any"; }
- disabled_all $dep_con || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some conflicting dependencies are unsatisfied: $dep_con"; }
- disabled_any $dep_sel && { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some selected dependency is unsatisfied: $dep_sel"; }
-
- enabled $cfg && enable_deep_weak $dep_sel $dep_sgs
-
- for dep in $dep_all $dep_any $dep_sel $dep_sgs; do
- # filter out library deps, these do not belong in extralibs
- is_in $dep $LIBRARY_LIST && continue
- enabled $dep && eval append ${cfg}_extralibs ${dep}_extralibs
- done
+ eval [ x\$${cfg}_checking = xdone ] && continue
+ eval [ x\$${cfg}_checking = xinprogress ] && die "Circular dependency for $cfg."
+
+ eval "
+ dep_all=\$${cfg}_deps
+ dep_any=\$${cfg}_deps_any
+ dep_con=\$${cfg}_conflict
+ dep_sel=\$${cfg}_select
+ dep_sgs=\$${cfg}_suggest
+ dep_ifa=\$${cfg}_if
+ dep_ifn=\$${cfg}_if_any
+ "
+
+ # most of the time here $cfg has no deps - avoid costly no-op work
+ if [ "$dep_all$dep_any$dep_con$dep_sel$dep_sgs$dep_ifa$dep_ifn" ]; then
+ eval ${cfg}_checking=inprogress
+
+ set -- $cfg "$dep_all" "$dep_any" "$dep_con" "$dep_sel" "$dep_sgs" "$dep_ifa" "$dep_ifn"
+ check_deps $dep_all $dep_any $dep_con $dep_sel $dep_sgs $dep_ifa $dep_ifn
+ cfg=$1; dep_all=$2; dep_any=$3; dep_con=$4; dep_sel=$5 dep_sgs=$6; dep_ifa=$7; dep_ifn=$8
+
+ [ -n "$dep_ifa" ] && { enabled_all $dep_ifa && enable_weak $cfg; }
+ [ -n "$dep_ifn" ] && { enabled_any $dep_ifn && enable_weak $cfg; }
+ enabled_all $dep_all || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not all dependencies are satisfied: $dep_all"; }
+ enabled_any $dep_any || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but not any dependency is satisfied: $dep_any"; }
+ disabled_all $dep_con || { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some conflicting dependencies are unsatisfied: $dep_con"; }
+ disabled_any $dep_sel && { disable $cfg && requested $cfg && die "ERROR: $cfg requested, but some selected dependency is unsatisfied: $dep_sel"; }
+
+ enabled $cfg && enable_deep_weak $dep_sel $dep_sgs
+
+ for dep in $dep_all $dep_any $dep_sel $dep_sgs; do
+ # filter out library deps, these do not belong in extralibs
+ is_in $dep $LIBRARY_LIST && continue
+ enabled $dep && eval append ${cfg}_extralibs ${dep}_extralibs
+ done
+ fi
- disable ${cfg}_checking
+ eval ${cfg}_checking=done
done
}
--
2.17.1
From 94b80da14fbca1296ac744a53581b817ae56d370 Mon Sep 17 00:00:00 2001
From: "Avi Halachmi (:avih)" <avihpit@yahoo.com>
Date: Wed, 1 Aug 2018 09:06:36 +0300
Subject: [PATCH] configure: unique(): fixup to restore original output order
Originally unique() was keeping the last occurence of each non-unique
item, but commit XXX changed it to keep the first.
This commit restores the original order due to *TBD*.
---
configure | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/configure b/configure
index e9cb7703..ba123ead 100755
--- a/configure
+++ b/configure
@@ -850,14 +850,27 @@ prepend(){
eval "$var=\"$* \$$var\""
}
+reverse () {
+ eval '
+ reverse_out=
+ for v in $'$1'; do
+ reverse_out="$v $reverse_out"
+ done
+ '$1'=$reverse_out
+ '
+}
+
+# keeps the last occurence of each non-unique item
unique(){
unique_out=
eval unique_in=\$$1
+ reverse unique_in
for v in $unique_in; do
# " $unique_out" +space such that every item is surrounded with spaces
case " $unique_out" in *" $v "*) continue; esac # already in list
unique_out="$unique_out$v "
done
+ reverse unique_out
eval $1=\$unique_out
}
--
2.17.1
From 77f897c8ed4eec9119d758037b0311629f549a5b Mon Sep 17 00:00:00 2001
From: "Avi Halachmi (:avih)" <avihpit@yahoo.com>
Date: Wed, 1 Aug 2018 09:10:12 +0300
Subject: [PATCH] configure: remove unused pushvar()/popvar()
---
configure | 19 -------------------
1 file changed, 19 deletions(-)
diff --git a/configure b/configure
index e9cb7703..d6c1d032 100755
--- a/configure
+++ b/configure
@@ -619,25 +619,6 @@ get_sanitized(){
eval echo \$$(sanitize_var_name "$1")
}
-pushvar(){
- for pvar in $*; do
- eval level=\${${pvar}_level:=0}
- eval ${pvar}_${level}="\$$pvar"
- eval ${pvar}_level=$(($level+1))
- done
-}
-
-popvar(){
- for pvar in $*; do
- eval level=\${${pvar}_level:-0}
- test $level = 0 && continue
- eval level=$(($level-1))
- eval $pvar="\${${pvar}_${level}}"
- eval ${pvar}_level=$level
- eval unset ${pvar}_${level}
- done
-}
-
request(){
for var in $*; do
eval ${var}_requested=yes
--
2.17.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment