Created
January 6, 2016 16:02
-
-
Save ibm-jstart/906f3561d7ecfd4cc0d3 to your computer and use it in GitHub Desktop.
A bash shell utility for splitting large files into chunks of a defined size without splitting the file in the middle of lines
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# ----------------------------------------- | |
# (c) Copyright IBM Corp. 2015. All Rights Reserved. | |
# Author: Nathan Hernandez | |
# License: Apache 2.0 ( http://www.apache.org/licenses/LICENSE-2.0 ) | |
# | |
# IBM cannot guarantee or imply reliability, serviceability, or function of this script file. The script code is provided "AS IS", without warranty of any kind. IBM shall not be liable for any damages arising out of your use of this code. | |
# ----------------------------------------- | |
USAGE="This script allows you to split a file into chunks of a defined size without\n"\ | |
"splitting the file in the middle of lines.\n"\ | |
"\n"\ | |
"Usage: ./split.sh <size> <file>\n"\ | |
"\tsize:\tThe desired size of each chunk written as a value followed by either \n"\ | |
"\t\tM (megabytes) or G (gigabytes), e.g., 10M or 1G.\n"\ | |
"\tfile:\tThe file you want to be split. The file must be either megabytes or \n"\ | |
"\t\tgigabytes large.\n" | |
if [ $# == 0 ]; then | |
echo -e "$USAGE" | |
exit 1 | |
fi | |
programRequired="split" | |
if [[ "$OSTYPE" == "linux-gnu" ]]; then | |
programRequired="split" | |
elif [[ "$OSTYPE" == "freebsd"* ]]; then | |
programRequired="gsplit" # Use gsplit instead of split on BSD. | |
elif [[ "$OSTYPE" == "darwin"* ]]; then | |
programRequired="gsplit" # Use gsplit instead of split on Mac OSX. | |
else | |
programRequired="split" # If all else fails, use split. | |
fi | |
command -v $programRequired >/dev/null 2>&1 || { | |
echo >&2 "I require $programRequired, but it's not installed. Aborting." | |
exit 1 | |
} | |
chunkSize="$1" | |
file="$2" | |
echo "File: $file" | |
echo "Chunk size: $chunkSize" | |
fileSize=$(ls -lh $file | awk -F ' ' '{print $5}') | |
fileUnit="${fileSize: -1}" | |
fileSize="${fileSize%?}" | |
chunkUnit="${chunkSize: -1}" | |
chunkSize="${chunkSize%?}" | |
if [ "$fileUnit" != "M" ] && [ "$fileUnit" != "G" ]; then | |
echo "Filesize needs to be in M (megabytes) or G (gigabytes). Aborting." | |
exit 1 | |
fi | |
if [ "$chunkUnit" != "M" ] && [ "$chunkUnit" != "G" ]; then | |
echo "Chunk unit needs to be in M (megabytes) or G (gigabytes). Aborting." | |
exit 1 | |
fi | |
if [ "$fileUnit" == "G" ] && [ "$chunkUnit" == "M" ]; then | |
fileSize=$((fileSize * 1000)) | |
fi | |
chunks=$((fileSize / chunkSize)) | |
echo "Creating $chunks chunks in tmp/$file/." | |
splitCommand="$programRequired --number=l/$chunks --numeric-suffixes $file chunk-" | |
# echo "$splitCommand" | |
eval $splitCommand | |
makeAndMove="mkdir -p tmp/$file/ && mv chunk-* tmp/$file/" | |
# echo "$makeAndMove" | |
eval $makeAndMove |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment