Skip to content

Instantly share code, notes, and snippets.

@roualdes roualdes/flatten.sh
Last active Jun 21, 2019

Embed
What would you like to do?
Flatten stan-dev/math's {scal,arr,mat}/**/*.hpp
# Flatten stan/math/{prim,rev,fwd,mix} and test/unit/math/{prim,rev,fwd,mix}
#
# Details
#
# Only works with GNU sed. If you're running macOS, install GNU sed
# with homebrew. For isntance,
#
# $ brew intsall gnu-sed
#
# It is my understanding that Linux machines use GNU sed by default,
# and I have no idea what to do for windows machines.
#
# flatten.sh should be run from the top-level directory of the
# stan-dev/math repository and only on a clean branch. You should NOT
# run flatten.sh twice in a row.
#
# Some human intervention is required. The file
# test/unit/math/rev/util.hpp is just easier to deal with by hand. A
# statement is printed at the end of this code to help me remember
# this.
#
# For more details see https://github.com/stan-dev/math/issues/937
#
# This script attempts to
#
# 1. maintain each file's git history, though this is not 100%
# possible as far as I know. An effort is made to maximize the number
# of retained git histories at the expense of many commits;
#
# 2. remove duplicate #ifndefs and #endifs that occur due to the
# merging of files from separate directories,
# e.g. prim/scal/fun/Phi.hpp and prim/mat/fun/Phi.hpp;
#
# 3. remove multiple namespace declarations that occur due to the
# merging of files from separate directories;
#
# 4. move #includes to the top of the files, while removing
# self-includes that occur due to the merging of files from separate
# directories and ordering them to satisfy build requirements and make
# cpplint,
#
# 5. reduce the number of includes by only ever including top-level
# headers,
#
# 6. change TEST names that occur due to the merging of files from
# separate directories
#
# 7. chage TESTs to include only top-level header files,
# e.g. stan/math/prim.hpp
#
# Example
#
# $ git clone https://github.com/stan-dev/math.git
# $ git clone https://gist.github.com/ea72fc0fbcac05329e562a0e60ee0a1d.git flatten_math
# $ cp flatten_math/flatten.sh math/
# $ cd math
# $ chmod +x ./flatten.sh
# $ ./flatten.sh
# $ ./runTests.py test/unit/math/prim/meta/is_constant_struct_test.cpp
#
# Notes
# 1. sed spits out an error for
# stan/math/prm/meta/scalar_type.hpp but I can't figure out why
all_fldrs=("rev" "fwd" "mix" "prim");
dir_dimensions=("scal" "arr" "mat");
gnused () {
sed --version >/dev/null 2>&1 && sed "$@" || gsed "$@"
}
for fldr in "${all_fldrs[@]}";
do
FLDR=$(echo "$fldr" | tr '[:lower:]' '[:upper:]')
dir_codebase=("./stan/math/$fldr" "./test/unit/math/$fldr")
for dir_dim in "${dir_dimensions[@]}";
do
if [ ! -f "${dir_codebase[0]}.hpp" ];
then
git mv "${dir_codebase[0]}/$dir_dim.hpp" "${dir_codebase[0]}.hpp";
git add "${dir_codebase[0]}.hpp";
git commit -m "flatten ${dir_codebase[0]}.hpp";
else
cat "${dir_codebase[0]}/$dir_dim.hpp" >> "${dir_codebase[0]}.hpp"
fi
for dir_code in "${dir_codebase[@]}";
do
# skip if not all dimensions listed;
# i'm looking at you mix
if [ ! -d "$dir_code/$dir_dim" ];
then
continue
fi
for dir_fn in "$dir_code/$dir_dim"/*;
do
dir_fn_name="$(basename $dir_fn)"
# if not a directory within {mat,scal,arr}/
# then move it up a level
# eg test/unit/math/rev/util.hpp
if [ ! -d "$dir_fn" ];
then
filename="$(basename $dir_fn)"
newfile="$dir_code/$dir_fn_name"
# echo "skip $dir_fn_name"
if [ ! -f "$newfile" ];
then
# echo "git mv $dir_fn $newfile";
git mv "$dir_fn" "$newfile";
git add "$newfile";
git commit -m "flatten $newfile";
else
# echo "cat $dir_fn >> $newfile";
cat "$dir_fn" >> "$newfile";
fi
continue;
fi
# Make directories, if needed
if [ ! -d "$dir_code/$dir_fn_name" ];
then
# echo "mkdir $dir_code/$dir_fn_name"
mkdir "$dir_code/$dir_fn_name";
fi
# Move files
for filepath in "$dir_fn"/*;
do
filename="$(basename $filepath)"
newfile="$dir_code/$dir_fn_name/$filename"
# if appending, delete last #endif
#+ and make unique TEST names
#+ -- I'm looking at you
#+ test/unit/math/prim/meta/is_constant_struct.cpp
if [ ! -f "$newfile" ];
then
git mv "$filepath" "$newfile";
git add "$newfile";
git commit -m "flatten $newfile";
# cat "$filepath" >> "$newfile"
else
# Delete last #endif and then ...
nlines=$(wc -l < "$newfile")
gnused -i -e "$((nlines-2)),$((nlines)) s/\#endif//" "$newfile";
# if in test
if [ "$dir_code" = "${dir_codebase[1]}" ];
then
# Change test names and append
# echo "change test names for $newfile"
gnused -e "/TEST(/ s/\(.*\)\(,.*$\)/\1\_$dir_dim\2/" "$filepath" >> "$newfile";
else
# ... Append
cat "$filepath" >> "$newfile";
fi
fi
# Edit includes/directives
# loop again to catch where mat includes scal/arr, etc.
# echo "Edit includes/directives for $newfile"
for any_dim in "${dir_dimensions[@]}";
do
# loop again to catch when fwd, rev, mix, includes any other
for any_fldr in "${all_fldrs[@]}";
do
# echo "Edit include/directives for $newfile"
# top-level
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\)\/$any_dim\(.hpp>\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#include\ <test\/unit\/math\/$any_fldr\)\/$any_dim\(.hpp>\)/\1\2/g" "$newfile";
# individual files
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#include\ <test\/unit\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "$newfile";
done
# ifdefs
any_DIM=$(echo "$any_dim" | tr '[:lower:]' '[:upper:]')
gnused -i -e "s/\(\#ifndef\ STAN\_MATH\_$FLDR\_\)$any_DIM\_\(.*\_HPP\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#define\ STAN\_MATH\_$FLDR\_\)$any_DIM\_\(.*\_HPP\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#ifndef\ TEST\_UNIT\_MATH\_$FLDR\_\)$any_DIM\_\(.*\_HPP\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#define\ TEST\_UNIT\_MATH\_$FLDR\_\)$any_DIM\_\(.*\_HPP\)/\1\2/g" "$newfile";
done
# Remove ifndefs beyond start:=max(first ifndef/define, first empty line)
nlines=$(wc -l < "$newfile");
empty_line=$(grep -n -m 1 "^$" "$newfile" | gnused "s/\([0-9]*\).*/\1/");
first_define_line=$(grep -n -m 1 "\#define.*$" "$newfile" | gnused "s/\([0-9]*\).*/\1/");
start=$((empty_line > first_define_line ? empty_line : first_define_line));
if [ -z "$start" ] || [ "$start" -eq 0 ];
then
start=1;
fi
# echo "Remove ifndefs for $newfile"
gnused -i "$((start+1)),$((nlines)) s/\#ifndef\ STAN\_MATH\_$FLDR\_.*$//" "$newfile";
gnused -i "$((start+1)),$((nlines)) s/\#define\ STAN\_MATH\_$FLDR\_.*$//" "$newfile";
gnused -i "$((start+1)),$((nlines)) s/\#ifndef\ TEST\_UNIT\_MATH\_$FLDR\_.*$//" "$newfile";
gnused -i "$((start+1)),$((nlines)) s/\#define\ TEST\_UNIT\_MATH\_$FLDR\_.*$//" "$newfile";
# Collect all includes
grep '\#include.*>' "$newfile" >> "$newfile.grep";
# Drop duplicate includes from $newfile.grep
awk '!x[$0]++' "$newfile.grep" > "$newfile.grep.awked" && mv "$newfile.grep.awked" "$newfile.grep";
# Remove self includes from $newfile.grep
newfile_as_include=$(echo "$newfile" | gnused "s/\.\/\(.*\.hpp\)/\1/");
gnused -i -e "s@\#include\ <$newfile_as_include>@@g" "$newfile.grep";
# Remove all includes from $newfile
# echo "Remove all includes for $newfile";
gnused -i -e "s/\#include.*>//g" "$newfile";
# Sort includes in $newfile.grep
# collect only C headers
grep '\#include[^/]*>' "$newfile.grep" > "$newfile.grep.c";
# remove only C headers
gnused -i -e "s/\#include[^/]*>//g" "$newfile.grep";
# move only C headers to end of file
cat "$newfile.grep.c" >> "$newfile.grep";
# if in test
if [ "$dir_code" = "${dir_codebase[1]}" ];
then
# only top-level includes
for any_fldr in "${all_fldrs[@]}";
do
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\).*\(\.hpp>\)/\1\2/g" "$newfile.grep";
done
# remove duplicates
awk '!x[$0]++' "$newfile.grep" > "$newfile.grep.awked" && mv "$newfile.grep.awked" "$newfile.grep";
fi
# Place all includes near top of $newfile
gnused -i -e "$((start))r $newfile.grep" "$newfile";
# Delete collected and sorted includes
# echo "Remove all collected includes for $newfile";
rm "$newfile.grep";
rm "$newfile.grep.c";
# Remove duplicate namespaces between first and last namespaces
first_ns=$(grep -n -m 1 "^namespace\s*math\s*{" "$newfile" | gnused "s/\([0-9]*\).*/\1/");
if [ -z "$first_ns" ];
then
first_ns=$(grep -n -m 1 "^namespace\s*stan\s*{" "$newfile" | gnused "s/\([0-9]*\).*/\1/");
fi
last_ns=$(grep -n "^}\s*//\s*namespace\s*math" "$newfile" | tail -n1 | gnused "s/\([0-9]*\).*/\1/");
if [ -z "$last_ns" ];
then
last_ns=$(grep -n "^}\s*//\s*namespace\s*stan" "$newfile" | tail -n1 | gnused "s/\([0-9]*\).*/\1/");
fi
if [ -n "$first_ns" ] && [ -n "$last_ns" ];
then
# echo "Remove duplicate namespaces for $newfile"
# remove open math/stan ns
gnused -i -e "$((first_ns+1)),$((last_ns-1)) s/^namespace\s*math\s*{//" "$newfile";
gnused -i -e "$((first_ns+1)),$((last_ns-1)) s/^namespace\s*stan\s*{//" "$newfile";
# remove labelled close math/stan ns
gnused -i -e "$((first_ns+1)),$((last_ns-1)) s/^}\s*\/\/\s*namespace\s*math//" "$newfile";
gnused -i -e "$((first_ns+1)),$((last_ns-1)) s/^}\s*\/\/\s*namespace\s*stan//" "$newfile";
fi
done
done
done
done
# Edit includes from $fldr.hpp
# echo "Edit includes from $fldr.hpp"
for any_dim in "${dir_dimensions[@]}";
do
for any_fldr in "${all_fldrs[@]}";
do
# top-level
gnused -i "s/\(\#include\ <stan\/math\/$any_fldr\)\/$any_dim\(\.hpp>\)/\1\2/g" "${dir_codebase[0]}.hpp";
# individual files
gnused -i "s/\(\#include\ <stan\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "${dir_codebase[0]}.hpp";
done
# ifdefs
any_DIM=$(echo "$any_dim" | tr '[:lower:]' '[:upper:]')
gnused -i "s/\(\#ifndef\ STAN\_MATH\_$FLDR\_\)$any_DIM\_\(HPP\)/\1\2/g" "${dir_codebase[0]}.hpp";
gnused -i "s/\(\#define\ STAN\_MATH\_$FLDR\_\)$any_DIM\_\(HPP\)/\1\2/g" "${dir_codebase[0]}.hpp";
done
# Remove last endif
last_endif=$(grep -n "\#endif$" "${dir_codebase[0]}.hpp" | tail -n1 | gnused "s/\([0-9]*\).*/\1/");
gnused -i -e "1,$((last_endif-1)) s/\#endif$//" "${dir_codebase[0]}.hpp";
# Drop duplicate lines from $fldr.hpp
# echo "Drop duplicate lines from $fldr.hpp"
awk '!x[$0]++' "${dir_codebase[0]}.hpp" > "${dir_codebase[0]}.hpp.awked" && mv "${dir_codebase[0]}.hpp.awked" "${dir_codebase[0]}.hpp";
# Remove ifndefs from middle of $fldr.hpp
# assumes #ifndefs come before includes
# echo "Remove ifndefs from $fldr.hpp"
nlines=$(wc -l < "${dir_codebase[0]}.hpp")
gnused -i -e "3,$((nlines)) s/\#ifndef\ STAN\_MATH\_$FLDR\_.*$//" "${dir_codebase[0]}.hpp";
gnused -i -e "3,$((nlines)) s/\#define\ STAN\_MATH\_$FLDR\_.*$//" "${dir_codebase[0]}.hpp";
# Add #endif back for STAN_OPENCL
# echo "Add #endif for STAN_OPENCL from prim.hpp"
if [ $fldr = "prim" ];
then
gnused -i -e '/\#include\ <stan\/math\/$fldr\/err\/check_opencl\.hpp>/a\\#endif\n' "${dir_codebase[0]}.hpp";
echo -e "\n#endif" >> "${dir_codebase[0]}.hpp";
fi
# Move C header(s?) to end of other includes
# find line of last include
last_inc=$(grep -n "\#include.*>" "${dir_codebase[0]}.hpp" | tail -n1 | gnused "s/\([0-9]*\).*/\1/");
# collect C header
grep '\#include[^/]*>' "${dir_codebase[0]}.hpp" > "${dir_codebase[0]}.hpp.c";
# remove C header
gnused -i -e "s/\#include[^.]*>//g" "${dir_codebase[0]}.hpp";
# place C header to end of other includes
gnused -i -e "$((last_inc))r ${dir_codebase[0]}.hpp.c" "${dir_codebase[0]}.hpp";
# delete collected C header
rm "${dir_codebase[0]}.hpp.c";
# Sweep core and remove references to $dir_dimensions
if [ -d "${dir_codebase[0]}/core" ];
then
for any_dim in "${dir_dimensions[@]}";
do
for dir_code in "${dir_codebase[@]}";
do
for filepath in "$dir_code/core"/*;
do
for any_fldr in "${all_fldrs[@]}";
do
# echo "Edit include/directives for $newfile"
# top-level
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\)\/$any_dim\(.hpp>\)/\1\2/g" "$filepath";
gnused -i -e "s/\(\#include\ <test\/unit\/math\/$any_fldr\)\/$any_dim\(.hpp>\)/\1\2/g" "$filepath";
# individual files
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "$filepath";
gnused -i -e "s/\(\#include\ <test\/unit\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "$filepath";
done
done
done
done
fi
done
## script @drezap's post-script edits
# 545cb65 -- remove include, tests pass
gnused -i -e "s/\#include\ <stan\/math\/prim\.hpp>//g" "./test/unit/math/prim/meta/partials_return_type_test.cpp";
git add "./test/unit/math/prim/meta/partials_return_type_test.cpp";
git commit -m "remove include, tests pass";
# 8e94a67 -- move includes to top
# find
grep "\#include\ <stan\/math\/prim\.hpp>" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp" >> "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp.grep"
grep "\#include\ <gtest/gtest\.h>" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp" >> "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp.grep"
# remove
gnused -i -e "s/\#include\ <gtest\/gtest\.h>//g" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp";
gnused -i -e "s/\#include\ <stan\/math\/prim\.hpp>//g" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp";
# replace where desired
gnused -i -e "1r ./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp.grep" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp";
# remove tmp file
rm "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp.grep";
git add "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp";
git commit -m "re: move includes to top";
# 1875bfa, 9c7936c -- remove include
git mv "./test/unit/math/prim/mat/util.hpp" "./test/unit/math/prim/util.hpp";
git add "./test/unit/math/prim/util.hpp";
git commit -m "move test/unit/math/prim/mat/util.hpp";
gnused -i -e "s/\(\#ifndef\ TEST\_MATH\_UNIT\_PRIM\_\)MAT\_\(.*\_HPP\)/\#ifndef\ TEST\_UNIT\_MATH\_PRIM\_\2/g" "./test/unit/math/prim/util.hpp";
gnused -i -e "s/\(\#define\ TEST\_MATH\_UNIT\_PRIM\_\)MAT\_\(.*\_HPP\)/\#define\ TEST\_UNIT\_MATH\_PRIM\_\2/g" "./test/unit/math/prim/util.hpp";
gnused -i -e "s/\(\#include\ <stan\/math\/prim\)\/mat\(.hpp>\)/\1\2/g" "./test/unit/math/prim/util.hpp";
git add "./test/unit/math/prim/util.hpp";
git commit -m "clean up test/unit/math/prim/util.hpp";
# 9aae3fd -- fix opencl test
gnused -i -e "/\#else/a #include <gtest/gtest.h>" "./test/unit/math/prim/err/check_opencl_test.cpp";
git add "./test/unit/math/prim/err/check_opencl_test.cpp";
git commit -m "re: fix opencl test";
# 7833448 -- change type Eigen::Matrix -> Eigen::Array so tests compile
gnused -i -e "s/temp \/= sqrt(acc.tail(pull) \/ acc(i));//g" "./stan/math/prim/fun/factor_U.hpp";
gnused -i -e "/temp\ =\ U.row(i)\.tail(pull)/a Eigen::Array<T, -1, 1>\ temp2\ =\ sqrt(acc.tail(pull)\ \/\ acc(i));\ntemp\ \/=\ temp2;" "./stan/math/prim/fun/factor_U.hpp";
git add "./stan/math/prim/fun/factor_U.hpp";
git commit -m "re: change type Eigen::Matrix -> Eigen::Array so tests compile";
# 0ce3605 -- change includes on sqrt_test.cpp
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/sqrt_test.cpp";
git add "./test/unit/math/prim/fun/sqrt_test.cpp";
git commit -m "re: change includes on sqrt_test.cpp";
# f1648b9 -- change assertion so test passes...
gnused -i -e "s/EXPECT_PRED1(boost::math::isnan<double>,\ stan::math::digamma(-1));//" "./test/unit/math/prim/fun/digamma_test.cpp";
gnused -i -e "/EXPECT_PRED1(boost::math::isnan<double>,\ stan::math::digamma(nan));/a EXPECT_THROW(stan::math::digamma(-1),\ std::domain_error);" "./test/unit/math/prim/fun/digamma_test.cpp";
git add "./test/unit/math/prim/fun/digamma_test.cpp";
git commit -m "re: change assertion so test passes";
# 2f7dc03 -- fix stack_alloc's includes
gnused -i -e "s/\(\#include\ <stan\/math\/prim\/\)scal\/\(.*>\)/\1\2/g" "./stan/math/memory/stack_alloc.hpp";
git add "./stan/math/memory/stack_alloc.hpp";
git commit -m "re: fix stack_alloc's includes";
# 41742e1 -- fix include for log_test.cpp
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/log_test.cpp";
git add "./test/unit/math/prim/fun/log_test.cpp";
git commit -m "re: fix include for log_test.cpp";
# 676044b -- csr_extract_u_include
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/csr_extract_u_test.cpp";
git add "./test/unit/math/prim/fun/csr_extract_u_test.cpp";
git commit -m "re: csr_extract_u_include";
# f32d080 -- csr_matrix include
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/csr_matrix_times_vector_test.cpp";
git add "./test/unit/math/prim/fun/csr_matrix_times_vector_test.cpp";
git commit -m "re: csr_matrix include";
# 87035f -- fix exp test
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/exp_test.cpp";
git add "./test/unit/math/prim/fun/exp_test.cpp";
git commit -m "re: fix exp test";
# 9ca0a94 -- fix csr_to_dense
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/csr_to_dense_matrix_test.cpp";
git add "./test/unit/math/prim/fun/csr_to_dense_matrix_test.cpp";
git commit -m "re: fix csr_to_dense";
# d6e09dc -- fix csr_extract_v
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/csr_extract_v_test.cpp";
git add "./test/unit/math/prim/fun/csr_extract_v_test.cpp";
git commit -m "re: fix csr_extract_v";
# 5d5ca39 -- fix autocorr file path
gnused -i -e "s/test\/unit\/math\/prim\/mat\/fun\/\(.*csv\)/test\/unit\/math\/prim\/fun\/\1/g" "./test/unit/math/prim/fun/autocorrelation_test.cpp";
git add "./test/unit/math/prim/fun/autocorrelation_test.cpp";
git commit -m "re: fix autocorr file path";
# 7308717 -- fix autocovariance test
gnused -i -e "s/test\/unit\/math\/prim\/mat\/fun\/\(.*csv\)/test\/unit\/math\/prim\/fun\/\1/g" "./test/unit/math/prim/fun/autocovariance_test.cpp";
git add "./test/unit/math/prim/fun/autocovariance_test.cpp";
git commit -m "re: fix autocovariance test";
# 3e4f4ca -- fix integrate 1d
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/functor/integrate_1d_test.cpp";
git add "./test/unit/math/prim/functor/integrate_1d_test.cpp";
git commit -m "re: fix integrate 1d";
## deal with test/unit/math/rev/util.hpp by hand
echo "DEAL WITH test/unit/math/rev/util.hpp BY HAND";
@drezap

This comment has been minimized.

Copy link

commented Jun 21, 2019

for linux, just replace gnused with sed and delete the version change at the top.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.