Created
December 12, 2018 08:52
-
-
Save jobel-code/5d7d4fdfd4b403809ef68d54309fd85f to your computer and use it in GitHub Desktop.
Splits large files into smaller files of 1000 lines, keeping the header on each small file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%bash | |
# echo $filepath | |
in_file=$in_filepath | |
DIR=$(dirname "$in_filepath") | |
filename=$(basename -- "$in_filepath") | |
extension="${filename##*.}" | |
#filename="${filepath##*/}" # This one will keep the extension | |
filename="${filename%.*}" # This will not have the extension | |
target=$DIR/$filename # removing the extension | |
echo $target | |
awk '{if (NR!=1) {print}}' $in_file | split -d -a 5 -l 1000 - $target"__" # Get all lines except the first, split into 1000 line chunks | |
for file in $target"__"* | |
do | |
tmp_file=$(mktemp $in_file.XXXXXX) # Create a safer temp file | |
head -n 1 $in_file | cat - $file > $tmp_file # Get header from main file, cat that header with split file contents to temp file | |
mv -f $tmp_file $file # Overwrite non-header containing file with header-containing file | |
mv -f $file $file.$extension # adding the extension | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment