Skip to content

Instantly share code, notes, and snippets.

@dbro
Created April 24, 2014 07:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dbro/11245203 to your computer and use it in GitHub Desktop.
Save dbro/11245203 to your computer and use it in GitHub Desktop.
partition lines of incoming data into separate files
#!/bin/bash
# write the incoming data on stdin to separate files depending on their contents
# for example, take a file that has different dates in it:
# 2014-02-15+12334567 hello there this is the first line
# 2014-02-16+23345678 hello there this is the second line
# this file can be used to send the first line to a file called /tmp/session.log-20140215-randomnumber
# and the second line to another file called /tmp/session.log-20140214-randomnumber
# it takes the first N characters from the line for use in the output filename
USAGE="usage: $0 -p \"/tmp/session-logs-ready-to-merge-\" [-s \"-ready-for-merge\" -r -c 10 -d'-'] [input_filename] [another_input_filename]
\tp\tprefix path
\ts\tsuffix of the path
\tc\tthe count of characters from the start of each row to use in the filename
\td\tdelete these characters from the extracted prefix
\tr\tappend a random string to the end of the file name
\t\tif input_filename is omitted, read from stdin
example: $0 -p \"/tmp/session.log-\" -s \"-merge-\" -c 10 -d'-' -r"
function display_usage() {
echo -e "$USAGE"
exit
}
#default values
prefix=""
suffix=""
random=false
charcount=0
delchars=""
AWK=$(which mawk) || $(which awk)
# TODO: check parameters for errors
while getopts "p:s:c:d:r?h" arg; do
case $arg in
h)
display_usage
;;
\?)
display_usage
;;
p)
prefix=$OPTARG
;;
s)
suffix=$OPTARG
;;
c)
charcount=$OPTARG
;;
d)
delchars=$OPTARG
;;
r)
random=true
;;
esac
done
if [ -z "$prefix" ]; then
echo -e "no prefix specified"
display_usage
fi
randomstring=""
if [ -n "$delchars" ]; then
delregex="[${delchars}]"
else
delregex=""
fi
shift $(($OPTIND - 1))
if [ "$#" -eq 0 ]; then
# no input filenames given, so we should read from stdin
# reset the array of arguments to contain one empty string
set -- ""
fi
for inputfile in "$@"; do
if [ ${#inputfile} -eq 0 ]; then
#echo "reading input from stdin"
CATFILE=`which cat`
elif [ ${inputfile:(-3)} == ".gz" ]; then
#echo "reading from input file = $inputfile"
CATFILE="`which zcat` $inputfile"
else
#echo "reading from input file = $inputfile"
CATFILE="`which cat` $inputfile"
fi
#if [ ${inputfile:(-3)} == ".gz" ]; then CAT=`which zcat`; else CAT=`which cat`; fi
if [ $random == true ]; then randomstring="-`uuidgen | tr -d '-' | cut -c1-10`"; fi
suffixplusrandom="${suffix}${randomstring}"
$CATFILE | $AWK -v "prefix=$prefix" -v "suffix=$suffixplusrandom" -v "charcount=$charcount" -v "delregex=$delregex" 'BEGIN {FS="\t"; OFS=FS} {snippet = substr($0, 1, charcount); gsub(delregex, "", snippet); outputfilename = prefix snippet suffix; print $0 > outputfilename}'
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment