Skip to content

Instantly share code, notes, and snippets.

@gabbaybr
Last active January 21, 2016 16:53
Show Gist options
  • Save gabbaybr/73cfd4df1d937ee12acd to your computer and use it in GitHub Desktop.
Save gabbaybr/73cfd4df1d937ee12acd to your computer and use it in GitHub Desktop.
Script to extract a block of file using regex, like: ./extractBlock.sh "18, 2016 2:.*PM" "18, 2016 3:.*PM" c1vm081.out ras2.log
#!/usr/bin/env bash
#############################################################
# Script to extract a block of text from big plain text files
# using regex to start and end lines
#
# How to use:
# ./extractBlock.sh [regex_start_line] [regex_end_line] [input_file] [output_file]
#
# Example of usage:
# ./extractBlock.sh \"18, 2016 2:.*PM\" \"18, 2016 3:.*PM\" c1vm081.out ras2.log
#
# Created by fgabbay
#############################################################
#############################################################
# HELPERS
#############################################################
is_empty() {
local var=$1
[[ -z $var ]]
}
#############################################################
# VALIDATE PARAMS
#############################################################
is_valid_params(){
is_empty $regex_start && usage && exit 0;
is_empty $regex_end && usage && exit 0;
is_empty $input_file && usage && exit 0;
is_empty $output_file && usage && exit 0;
echo "The params is ok, keep going..."
}
#############################################################
# USAGE
#############################################################
usage(){
cat <<- EOF
===========================================================
usage: $script_file_name
===========================================================
Extract a block of text using regex to begining and end.
$script_file_name [regex_start_line] [regex_end_line] [input_file] [output_file]
Examples:
$script_file_name "18, 2016 2:.*PM" "18, 2016 3:.*PM" c1vm081.out ras2.log
or
$script_file_name "21, 2016 8:.*AM" "21, 2016 9:.*AM" logs-weblogic/c1vm081.out weblogic-travamento-8h27.log
EOF
}
#############################################################
# MAIN EXECUTION
#############################################################
main() {
# Check if params was sent
is_valid_params
echo "Looking for begin and end lines..."
local begining_at=$(grep -n "$regex_start" $input_file | grep -Eo '^[^:]+' | awk 'NR==1 {print; exit}')
# If begining was not found, abort script
is_empty $begining_at \
&& echo "The pattern \"${regex_start}\" for begining could not be found in file \"${input_file}\"." \
&& exit 0;
echo "Starting at line: $begining_at"
local end_at=$(grep -n "$regex_end" $input_file | grep -Eo '^[^:]+' | awk 'NR==1 {print; exit}')
echo "Ending block at line: $end_at"
# If end_at is empty, get last line found from begining_at
is_empty $end_at \
&& echo "End of block was not found, getting last line from start param..." \
&& end_at=$(grep -n "18, 2016 2:.*PM" ${input_file} | grep -Eo '^[^:]+' | awk 'END{print}') \
&& echo "Ending block at line: $end_at"
echo "Capturing between lines ${begining_at} and ${end_at}..."
sed -n "${begining_at},${end_at}p" ${input_file} > ${output_file}
echo "done!"
}
#############################################################
# GLOBAL VARIABLES
#############################################################
readonly regex_start=$1
readonly regex_end=$2
readonly input_file=$3
readonly output_file=$4
readonly script_file_name=$0
main
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment