bschopman/chop_up_log.sh

## chop_up_log.sh
#!/bin/bash

BATCH_SIZE=5
WRITE_PATH=/tmp/
TIMEOUT_SECONDS=1

function writeBatch {
    out_path="$WRITE_PATH"$(date +%s_%N)".log"
    echo -e $batch > $out_path
    count=0
    batch=""
    echo "Wrote $out_path"
}

while true; do
    IFS= read -r -t $TIMEOUT_SECONDS line
    readResult=$?
    #echo $readResult
    if [ "$readResult" -gt "128" ]; then
        #echo "timeout"
        if [ ! -z $batch ]; then
            writeBatch
        fi
        continue
    fi
    if [ "$readResult" -ne "0" ]; then
        echo "Error"
        exit 1
    fi

    #echo "line: "$line
    if [ ! -z $batch ]; then
        batch=$batch"\n"
    fi
    batch=$batch$line
    ((++count))
    if [ $count -ge $BATCH_SIZE ]; then
        writeBatch
    fi
done < /dev/stdin

## usage.md

      
    Raw
  

              usage.md
            
          
    Start this script with a tail pipe as stdin, e.g.:
$ tail -F /tmp/access.log | ./chop_up_log.sh
The three variables at the start are parameter. As soon as $BATCH_SIZE lines are read,
a batch is written to disk. As soon as the read process has waited $TIMEOUT_SECONDS seconds,
and there is data in the buffer, a batch is written to disk.
This script can be used, for example, to read a web server's access log and write small files to
a Flume SpoolDir source.
	#!/bin/bash

	BATCH_SIZE=5
	WRITE_PATH=/tmp/
	TIMEOUT_SECONDS=1

	function writeBatch {
	out_path="$WRITE_PATH"$(date +%s_%N)".log"
	echo -e $batch > $out_path
	count=0
	batch=""
	echo "Wrote $out_path"
	}

	while true; do
	IFS= read -r -t $TIMEOUT_SECONDS line
	readResult=$?
	#echo $readResult
	if [ "$readResult" -gt "128" ]; then
	#echo "timeout"
	if [ ! -z $batch ]; then
	writeBatch
	fi
	continue
	fi
	if [ "$readResult" -ne "0" ]; then
	echo "Error"
	exit 1
	fi

	#echo "line: "$line
	if [ ! -z $batch ]; then
	batch=$batch"\n"
	fi
	batch=$batch$line
	((++count))
	if [ $count -ge $BATCH_SIZE ]; then
	writeBatch
	fi
	done < /dev/stdin