Skip to content

Instantly share code, notes, and snippets.

@roualdes roualdes/flatten.sh
Last active Apr 5, 2019

Embed
What would you like to do?
Flatten stan-dev/math's {scal,arr,mat}/**/*.hpp
# Flatten stan/math/{prim,rev,fwd,mix} and test/unit/math/{prim,rev,fwd,mix}
#
# Details
#
# Only works with GNU sed. If you're running macOS, install GNU sed
# with homebrew. For isntance,
#
# $ brew intsall gnu-sed
#
# It is my understanding that Linux machines use GNU sed by default,
# and I have no idea what to do for windows machines.
#
# flatten.sh should be run from the top-level directory of the
# stan-dev/math repository and only on a clean branch. You should NOT
# run flatten.sh twice in a row.
#
# Some human intervention is required. The file
# test/unit/math/rev/util.hpp is just easier to deal with by hand. A
# statement is printed at the end of this code to help me remember
# this.
#
# For more details see https://github.com/stan-dev/math/issues/937
#
# This script attempts to
#
# 1. maintain each file's git history, though this is not 100%
# possible as far as I know. An effort is made to maximize the number
# of retained git histories at the expense of many commits;
#
# 2. remove duplicate #ifndefs and #endifs that occur due to the
# merging of files from separate directories,
# e.g. prim/scal/fun/Phi.hpp and prim/mat/fun/Phi.hpp;
#
# 3. remove multiple namespace declarations that occur due to the
# merging of files from separate directories;
#
# 4. move #includes to the top of the files, while removing
# self-includes that occur due to the merging of files from separate
# directories and ordering them to satisfy build requirements and make
# cpplint,
#
# 5. reduce the number of includes by only ever including top-level
# headers,
#
# 6. change TEST names that occur due to the merging of files from
# separate directories
#
# 7. chage TESTs to include only top-level header files,
# e.g. stan/math/prim.hpp
#
# Example
#
# $ git clone https://github.com/stan-dev/math.git
# $ git clone https://gist.github.com/ea72fc0fbcac05329e562a0e60ee0a1d.git flatten_math
# $ cp flatten_math/flatten.sh math/
# $ cd math
# $ chmod +x ./flatten.sh
# $ ./flatten.sh
# $ ./runTests.py test/unit/math/prim/meta/is_constant_struct_test.cpp
#
# Notes
# 1. sed spits out an error for
# stan/math/prm/meta/scalar_type.hpp but I can't figure out why
all_fldrs=("rev" "fwd" "mix" "prim");
dir_dimensions=("scal" "arr" "mat");
gnused () {
sed --version >/dev/null 2>&1 && sed "$@" || gsed "$@"
}
for fldr in "${all_fldrs[@]}";
do
FLDR=$(echo "$fldr" | tr '[:lower:]' '[:upper:]')
dir_codebase=("./stan/math/$fldr" "./test/unit/math/$fldr")
for dir_dim in "${dir_dimensions[@]}";
do
if [ ! -f "${dir_codebase[0]}.hpp" ];
then
git mv "${dir_codebase[0]}/$dir_dim.hpp" "${dir_codebase[0]}.hpp";
git add "${dir_codebase[0]}.hpp";
git commit -m "flatten ${dir_codebase[0]}.hpp";
else
cat "${dir_codebase[0]}/$dir_dim.hpp" >> "${dir_codebase[0]}.hpp"
fi
for dir_code in "${dir_codebase[@]}";
do
# skip if not all dimensions listed;
# i'm looking at you mix
if [ ! -d "$dir_code/$dir_dim" ];
then
continue
fi
for dir_fn in "$dir_code/$dir_dim"/*;
do
dir_fn_name="$(basename $dir_fn)"
# if not a directory within {mat,scal,arr}/
# then move it up a level
# eg test/unit/math/rev/util.hpp
if [ ! -d "$dir_fn" ];
then
filename="$(basename $dir_fn)"
newfile="$dir_code/$dir_fn_name"
# echo "skip $dir_fn_name"
if [ ! -f "$newfile" ];
then
# echo "git mv $dir_fn $newfile";
git mv "$dir_fn" "$newfile";
git add "$newfile";
git commit -m "flatten $newfile";
else
# echo "cat $dir_fn >> $newfile";
cat "$dir_fn" >> "$newfile";
fi
continue;
fi
# Make directories, if needed
if [ ! -d "$dir_code/$dir_fn_name" ];
then
# echo "mkdir $dir_code/$dir_fn_name"
mkdir "$dir_code/$dir_fn_name";
fi
# Move files
for filepath in "$dir_fn"/*;
do
filename="$(basename $filepath)"
newfile="$dir_code/$dir_fn_name/$filename"
# if appending, delete last #endif
#+ and make unique TEST names
#+ -- I'm looking at you
#+ test/unit/math/prim/meta/is_constant_struct.cpp
if [ ! -f "$newfile" ];
then
git mv "$filepath" "$newfile";
git add "$newfile";
git commit -m "flatten $newfile";
# cat "$filepath" >> "$newfile"
else
# Delete last #endif and then ...
nlines=$(wc -l < "$newfile")
gnused -i -e "$((nlines-2)),$((nlines)) s/\#endif//" "$newfile";
# if in test
if [ "$dir_code" = "${dir_codebase[1]}" ];
then
# Change test names and append
# echo "change test names for $newfile"
gnused -e "/TEST(/ s/\(.*\)\(,.*$\)/\1\_$dir_dim\2/" "$filepath" >> "$newfile";
else
# ... Append
cat "$filepath" >> "$newfile";
fi
fi
# Edit includes/directives
# loop again to catch where mat includes scal/arr, etc.
# echo "Edit includes/directives for $newfile"
for any_dim in "${dir_dimensions[@]}";
do
# loop again to catch when fwd, rev, mix, includes any other
for any_fldr in "${all_fldrs[@]}";
do
# echo "Edit include/directives for $newfile"
# top-level
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\)\/$any_dim\(.hpp>\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#include\ <test\/unit\/math\/$any_fldr\)\/$any_dim\(.hpp>\)/\1\2/g" "$newfile";
# individual files
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#include\ <test\/unit\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "$newfile";
done
# ifdefs
any_DIM=$(echo "$any_dim" | tr '[:lower:]' '[:upper:]')
gnused -i -e "s/\(\#ifndef\ STAN\_MATH\_$FLDR\_\)$any_DIM\_\(.*\_HPP\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#define\ STAN\_MATH\_$FLDR\_\)$any_DIM\_\(.*\_HPP\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#ifndef\ TEST\_UNIT\_MATH\_$FLDR\_\)$any_DIM\_\(.*\_HPP\)/\1\2/g" "$newfile";
gnused -i -e "s/\(\#define\ TEST\_UNIT\_MATH\_$FLDR\_\)$any_DIM\_\(.*\_HPP\)/\1\2/g" "$newfile";
done
# Remove ifndefs beyond start:=max(first ifndef/define, first empty line)
nlines=$(wc -l < "$newfile");
empty_line=$(grep -n -m 1 "^$" "$newfile" | gnused "s/\([0-9]*\).*/\1/");
first_define_line=$(grep -n -m 1 "\#define.*$" "$newfile" | gnused "s/\([0-9]*\).*/\1/");
start=$((empty_line > first_define_line ? empty_line : first_define_line));
if [ -z "$start" ] || [ "$start" -eq 0 ];
then
start=1;
fi
# echo "Remove ifndefs for $newfile"
gnused -i "$((start+1)),$((nlines)) s/\#ifndef\ STAN\_MATH\_$FLDR\_.*$//" "$newfile";
gnused -i "$((start+1)),$((nlines)) s/\#define\ STAN\_MATH\_$FLDR\_.*$//" "$newfile";
gnused -i "$((start+1)),$((nlines)) s/\#ifndef\ TEST\_UNIT\_MATH\_$FLDR\_.*$//" "$newfile";
gnused -i "$((start+1)),$((nlines)) s/\#define\ TEST\_UNIT\_MATH\_$FLDR\_.*$//" "$newfile";
# Collect all includes
grep '\#include.*>' "$newfile" >> "$newfile.grep";
# Drop duplicate includes from $newfile.grep
awk '!x[$0]++' "$newfile.grep" > "$newfile.grep.awked" && mv "$newfile.grep.awked" "$newfile.grep";
# Remove self includes from $newfile.grep
newfile_as_include=$(echo "$newfile" | gnused "s/\.\/\(.*\.hpp\)/\1/");
gnused -i -e "s@\#include\ <$newfile_as_include>@@g" "$newfile.grep";
# Remove all includes from $newfile
# echo "Remove all includes for $newfile";
gnused -i -e "s/\#include.*>//g" "$newfile";
# Sort includes in $newfile.grep
# collect only C headers
grep '\#include[^/]*>' "$newfile.grep" > "$newfile.grep.c";
# remove only C headers
gnused -i -e "s/\#include[^/]*>//g" "$newfile.grep";
# move only C headers to end of file
cat "$newfile.grep.c" >> "$newfile.grep";
# if in test
if [ "$dir_code" = "${dir_codebase[1]}" ];
then
# only top-level includes
for any_fldr in "${all_fldrs[@]}";
do
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\).*\(\.hpp>\)/\1\2/g" "$newfile.grep";
done
# remove duplicates
awk '!x[$0]++' "$newfile.grep" > "$newfile.grep.awked" && mv "$newfile.grep.awked" "$newfile.grep";
fi
# Place all includes near top of $newfile
gnused -i -e "$((start))r $newfile.grep" "$newfile";
# Delete collected and sorted includes
# echo "Remove all collected includes for $newfile";
rm "$newfile.grep";
rm "$newfile.grep.c";
# Remove duplicate namespaces between first and last namespaces
first_ns=$(grep -n -m 1 "^namespace\s*math\s*{" "$newfile" | gnused "s/\([0-9]*\).*/\1/");
if [ -z "$first_ns" ];
then
first_ns=$(grep -n -m 1 "^namespace\s*stan\s*{" "$newfile" | gnused "s/\([0-9]*\).*/\1/");
fi
last_ns=$(grep -n "^}\s*//\s*namespace\s*math" "$newfile" | tail -n1 | gnused "s/\([0-9]*\).*/\1/");
if [ -z "$last_ns" ];
then
last_ns=$(grep -n "^}\s*//\s*namespace\s*stan" "$newfile" | tail -n1 | gnused "s/\([0-9]*\).*/\1/");
fi
if [ -n "$first_ns" ] && [ -n "$last_ns" ];
then
# echo "Remove duplicate namespaces for $newfile"
# remove open math/stan ns
gnused -i -e "$((first_ns+1)),$((last_ns-1)) s/^namespace\s*math\s*{//" "$newfile";
gnused -i -e "$((first_ns+1)),$((last_ns-1)) s/^namespace\s*stan\s*{//" "$newfile";
# remove labelled close math/stan ns
gnused -i -e "$((first_ns+1)),$((last_ns-1)) s/^}\s*\/\/\s*namespace\s*math//" "$newfile";
gnused -i -e "$((first_ns+1)),$((last_ns-1)) s/^}\s*\/\/\s*namespace\s*stan//" "$newfile";
fi
done
done
done
done
# Edit includes from $fldr.hpp
# echo "Edit includes from $fldr.hpp"
for any_dim in "${dir_dimensions[@]}";
do
for any_fldr in "${all_fldrs[@]}";
do
# top-level
gnused -i "s/\(\#include\ <stan\/math\/$any_fldr\)\/$any_dim\(\.hpp>\)/\1\2/g" "${dir_codebase[0]}.hpp";
# individual files
gnused -i "s/\(\#include\ <stan\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "${dir_codebase[0]}.hpp";
done
# ifdefs
any_DIM=$(echo "$any_dim" | tr '[:lower:]' '[:upper:]')
gnused -i "s/\(\#ifndef\ STAN\_MATH\_$FLDR\_\)$any_DIM\_\(HPP\)/\1\2/g" "${dir_codebase[0]}.hpp";
gnused -i "s/\(\#define\ STAN\_MATH\_$FLDR\_\)$any_DIM\_\(HPP\)/\1\2/g" "${dir_codebase[0]}.hpp";
done
# Remove last endif
last_endif=$(grep -n "\#endif$" "${dir_codebase[0]}.hpp" | tail -n1 | gnused "s/\([0-9]*\).*/\1/");
gnused -i -e "1,$((last_endif-1)) s/\#endif$//" "${dir_codebase[0]}.hpp";
# Drop duplicate lines from $fldr.hpp
# echo "Drop duplicate lines from $fldr.hpp"
awk '!x[$0]++' "${dir_codebase[0]}.hpp" > "${dir_codebase[0]}.hpp.awked" && mv "${dir_codebase[0]}.hpp.awked" "${dir_codebase[0]}.hpp";
# Remove ifndefs from middle of $fldr.hpp
# assumes #ifndefs come before includes
# echo "Remove ifndefs from $fldr.hpp"
nlines=$(wc -l < "${dir_codebase[0]}.hpp")
gnused -i -e "3,$((nlines)) s/\#ifndef\ STAN\_MATH\_$FLDR\_.*$//" "${dir_codebase[0]}.hpp";
gnused -i -e "3,$((nlines)) s/\#define\ STAN\_MATH\_$FLDR\_.*$//" "${dir_codebase[0]}.hpp";
# Add #endif back for STAN_OPENCL
# echo "Add #endif for STAN_OPENCL from prim.hpp"
if [ $fldr = "prim" ];
then
gnused -i -e '/\#include\ <stan\/math\/$fldr\/err\/check_opencl\.hpp>/a\\#endif\n' "${dir_codebase[0]}.hpp";
echo -e "\n#endif" >> "${dir_codebase[0]}.hpp";
fi
# Move C header(s?) to end of other includes
# find line of last include
last_inc=$(grep -n "\#include.*>" "${dir_codebase[0]}.hpp" | tail -n1 | gnused "s/\([0-9]*\).*/\1/");
# collect C header
grep '\#include[^/]*>' "${dir_codebase[0]}.hpp" > "${dir_codebase[0]}.hpp.c";
# remove C header
gnused -i -e "s/\#include[^.]*>//g" "${dir_codebase[0]}.hpp";
# place C header to end of other includes
gnused -i -e "$((last_inc))r ${dir_codebase[0]}.hpp.c" "${dir_codebase[0]}.hpp";
# delete collected C header
rm "${dir_codebase[0]}.hpp.c";
# Sweep core and remove references to $dir_dimensions
if [ -d "${dir_codebase[0]}/core" ];
then
for any_dim in "${dir_dimensions[@]}";
do
for dir_code in "${dir_codebase[@]}";
do
for filepath in "$dir_code/core"/*;
do
for any_fldr in "${all_fldrs[@]}";
do
# echo "Edit include/directives for $newfile"
# top-level
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\)\/$any_dim\(.hpp>\)/\1\2/g" "$filepath";
gnused -i -e "s/\(\#include\ <test\/unit\/math\/$any_fldr\)\/$any_dim\(.hpp>\)/\1\2/g" "$filepath";
# individual files
gnused -i -e "s/\(\#include\ <stan\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "$filepath";
gnused -i -e "s/\(\#include\ <test\/unit\/math\/$any_fldr\/\)$any_dim\/\(.*>\)/\1\2/g" "$filepath";
done
done
done
done
fi
done
## script @drezap's post-script edits
# 545cb65 -- remove include, tests pass
gnused -i -e "s/\#include\ <stan\/math\/prim\.hpp>//g" "./test/unit/math/prim/meta/partials_return_type_test.cpp";
git add "./test/unit/math/prim/meta/partials_return_type_test.cpp";
git commit -m "remove include, tests pass";
# 8e94a67 -- move includes to top
# find
grep "\#include\ <stan\/math\/prim\.hpp>" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp" >> "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp.grep"
grep "\#include\ <gtest/gtest\.h>" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp" >> "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp.grep"
# remove
gnused -i -e "s/\#include\ <gtest\/gtest\.h>//g" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp";
gnused -i -e "s/\#include\ <stan\/math\/prim\.hpp>//g" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp";
# replace where desired
gnused -i -e "1r ./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp.grep" "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp";
# remove tmp file
rm "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp.grep";
git add "./test/unit/math/prim/prob/skew_normal_cdf_log_test.cpp";
git commit -m "re: move includes to top";
# 1875bfa, 9c7936c -- remove include
git mv "./test/unit/math/prim/mat/util.hpp" "./test/unit/math/prim/util.hpp";
git add "./test/unit/math/prim/util.hpp";
git commit -m "move test/unit/math/prim/mat/util.hpp";
gnused -i -e "s/\(\#ifndef\ TEST\_MATH\_UNIT\_PRIM\_\)MAT\_\(.*\_HPP\)/\#ifndef\ TEST\_UNIT\_MATH\_PRIM\_\2/g" "./test/unit/math/prim/util.hpp";
gnused -i -e "s/\(\#define\ TEST\_MATH\_UNIT\_PRIM\_\)MAT\_\(.*\_HPP\)/\#define\ TEST\_UNIT\_MATH\_PRIM\_\2/g" "./test/unit/math/prim/util.hpp";
gnused -i -e "s/\(\#include\ <stan\/math\/prim\)\/mat\(.hpp>\)/\1\2/g" "./test/unit/math/prim/util.hpp";
git add "./test/unit/math/prim/util.hpp";
git commit -m "clean up test/unit/math/prim/util.hpp";
# 9aae3fd -- fix opencl test
gnused -i -e "/\#else/a #include <gtest/gtest.h>" "./test/unit/math/prim/err/check_opencl_test.cpp";
git add "./test/unit/math/prim/err/check_opencl_test.cpp";
git commit -m "re: fix opencl test";
# 7833448 -- change type Eigen::Matrix -> Eigen::Array so tests compile
gnused -i -e "s/temp \/= sqrt(acc.tail(pull) \/ acc(i));//g" "./stan/math/prim/fun/factor_U.hpp";
gnused -i -e "/temp\ =\ U.row(i)\.tail(pull)/a Eigen::Array<T, -1, 1>\ temp2\ =\ sqrt(acc.tail(pull)\ \/\ acc(i));\ntemp\ \/=\ temp2;" "./stan/math/prim/fun/factor_U.hpp";
git add "./stan/math/prim/fun/factor_U.hpp";
git commit -m "re: change type Eigen::Matrix -> Eigen::Array so tests compile";
# 0ce3605 -- change includes on sqrt_test.cpp
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/sqrt_test.cpp";
git add "./test/unit/math/prim/fun/sqrt_test.cpp";
git commit -m "re: change includes on sqrt_test.cpp";
# f1648b9 -- change assertion so test passes...
gnused -i -e "s/EXPECT_PRED1(boost::math::isnan<double>,\ stan::math::digamma(-1));//" "./test/unit/math/prim/fun/digamma_test.cpp";
gnused -i -e "/EXPECT_PRED1(boost::math::isnan<double>,\ stan::math::digamma(nan));/a EXPECT_THROW(stan::math::digamma(-1),\ std::domain_error);" "./test/unit/math/prim/fun/digamma_test.cpp";
git add "./test/unit/math/prim/fun/digamma_test.cpp";
git commit -m "re: change assertion so test passes";
# 2f7dc03 -- fix stack_alloc's includes
gnused -i -e "s/\(\#include\ <stan\/math\/prim\/\)scal\/\(.*>\)/\1\2/g" "./stan/math/memory/stack_alloc.hpp";
git add "./stan/math/memory/stack_alloc.hpp";
git commit -m "re: fix stack_alloc's includes";
# 41742e1 -- fix include for log_test.cpp
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/log_test.cpp";
git add "./test/unit/math/prim/fun/log_test.cpp";
git commit -m "re: fix include for log_test.cpp";
# 676044b -- csr_extract_u_include
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/csr_extract_u_test.cpp";
git add "./test/unit/math/prim/fun/csr_extract_u_test.cpp";
git commit -m "re: csr_extract_u_include";
# f32d080 -- csr_matrix include
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/csr_matrix_times_vector_test.cpp";
git add "./test/unit/math/prim/fun/csr_matrix_times_vector_test.cpp";
git commit -m "re: csr_matrix include";
# 87035f -- fix exp test
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/exp_test.cpp";
git add "./test/unit/math/prim/fun/exp_test.cpp";
git commit -m "re: fix exp test";
# 9ca0a94 -- fix csr_to_dense
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/csr_to_dense_matrix_test.cpp";
git add "./test/unit/math/prim/fun/csr_to_dense_matrix_test.cpp";
git commit -m "re: fix csr_to_dense";
# d6e09dc -- fix csr_extract_v
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/fun/csr_extract_v_test.cpp";
git add "./test/unit/math/prim/fun/csr_extract_v_test.cpp";
git commit -m "re: fix csr_extract_v";
# 5d5ca39 -- fix autocorr file path
gnused -i -e "s/test\/unit\/math\/prim\/mat\/fun\/\(.*csv\)/test\/unit\/math\/prim\/fun\/\1/g" "./test/unit/math/prim/fun/autocorrelation_test.cpp";
git add "./test/unit/math/prim/fun/autocorrelation_test.cpp";
git commit -m "re: fix autocorr file path";
# 7308717 -- fix autocovariance test
gnused -i -e "s/test\/unit\/math\/prim\/mat\/fun\/\(.*csv\)/test\/unit\/math\/prim\/fun\/\1/g" "./test/unit/math/prim/fun/autocovariance_test.cpp";
git add "./test/unit/math/prim/fun/autocovariance_test.cpp";
git commit -m "re: fix autocovariance test";
# 3e4f4ca -- fix integrate 1d
gnused -i -e "s/\#include\ <stan\/math\.hpp>/\#include\ <stan\/math\/prim\.hpp>/" "./test/unit/math/prim/functor/integrate_1d_test.cpp";
git add "./test/unit/math/prim/functor/integrate_1d_test.cpp";
git commit -m "re: fix integrate 1d";
## deal with test/unit/math/rev/util.hpp by hand
echo "DEAL WITH test/unit/math/rev/util.hpp BY HAND";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.