Last active
January 21, 2016 16:53
-
-
Save gabbaybr/73cfd4df1d937ee12acd to your computer and use it in GitHub Desktop.
Script to extract a block of file using regex, like: ./extractBlock.sh "18, 2016 2:.*PM" "18, 2016 3:.*PM" c1vm081.out ras2.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
############################################################# | |
# Script to extract a block of text from big plain text files | |
# using regex to start and end lines | |
# | |
# How to use: | |
# ./extractBlock.sh [regex_start_line] [regex_end_line] [input_file] [output_file] | |
# | |
# Example of usage: | |
# ./extractBlock.sh \"18, 2016 2:.*PM\" \"18, 2016 3:.*PM\" c1vm081.out ras2.log | |
# | |
# Created by fgabbay | |
############################################################# | |
############################################################# | |
# HELPERS | |
############################################################# | |
is_empty() { | |
local var=$1 | |
[[ -z $var ]] | |
} | |
############################################################# | |
# VALIDATE PARAMS | |
############################################################# | |
is_valid_params(){ | |
is_empty $regex_start && usage && exit 0; | |
is_empty $regex_end && usage && exit 0; | |
is_empty $input_file && usage && exit 0; | |
is_empty $output_file && usage && exit 0; | |
echo "The params is ok, keep going..." | |
} | |
############################################################# | |
# USAGE | |
############################################################# | |
usage(){ | |
cat <<- EOF | |
=========================================================== | |
usage: $script_file_name | |
=========================================================== | |
Extract a block of text using regex to begining and end. | |
$script_file_name [regex_start_line] [regex_end_line] [input_file] [output_file] | |
Examples: | |
$script_file_name "18, 2016 2:.*PM" "18, 2016 3:.*PM" c1vm081.out ras2.log | |
or | |
$script_file_name "21, 2016 8:.*AM" "21, 2016 9:.*AM" logs-weblogic/c1vm081.out weblogic-travamento-8h27.log | |
EOF | |
} | |
############################################################# | |
# MAIN EXECUTION | |
############################################################# | |
main() { | |
# Check if params was sent | |
is_valid_params | |
echo "Looking for begin and end lines..." | |
local begining_at=$(grep -n "$regex_start" $input_file | grep -Eo '^[^:]+' | awk 'NR==1 {print; exit}') | |
# If begining was not found, abort script | |
is_empty $begining_at \ | |
&& echo "The pattern \"${regex_start}\" for begining could not be found in file \"${input_file}\"." \ | |
&& exit 0; | |
echo "Starting at line: $begining_at" | |
local end_at=$(grep -n "$regex_end" $input_file | grep -Eo '^[^:]+' | awk 'NR==1 {print; exit}') | |
echo "Ending block at line: $end_at" | |
# If end_at is empty, get last line found from begining_at | |
is_empty $end_at \ | |
&& echo "End of block was not found, getting last line from start param..." \ | |
&& end_at=$(grep -n "18, 2016 2:.*PM" ${input_file} | grep -Eo '^[^:]+' | awk 'END{print}') \ | |
&& echo "Ending block at line: $end_at" | |
echo "Capturing between lines ${begining_at} and ${end_at}..." | |
sed -n "${begining_at},${end_at}p" ${input_file} > ${output_file} | |
echo "done!" | |
} | |
############################################################# | |
# GLOBAL VARIABLES | |
############################################################# | |
readonly regex_start=$1 | |
readonly regex_end=$2 | |
readonly input_file=$3 | |
readonly output_file=$4 | |
readonly script_file_name=$0 | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment