Skip to content

Instantly share code, notes, and snippets.

@douglascodes
Last active December 16, 2015 06:48
Show Gist options
  • Save douglascodes/5393571 to your computer and use it in GitHub Desktop.
Save douglascodes/5393571 to your computer and use it in GitHub Desktop.
Bash script for finding the percentage of Byte difference between two files. Uses wc, cmp, grep and bc.
#! /bin/bash
echo Reports the percentage of matching bytes between two files.
LENGTH_OF_A=$( wc -c $1 | grep -Eo [0-9]+)
LENGTH_OF_B=$( wc -c $2 | grep -Eo [0-9]+)
SAME_SIZE="false"
if [ "$LENGTH_OF_B" -gt "$LENGTH_OF_A" ]
then
LONGER=$2
SHORTER=$1
let LONGER_BYTE_NUMBER=$LENGTH_OF_B
let SHORTER_BYTE_NUMBER=$LENGTH_OF_A
else
LONGER=$1
SHORTER=$2
let LONGER_BYTE_NUMBER=$LENGTH_OF_A
let SHORTER_BYTE_NUMBER=$LENGTH_OF_B
fi
if [ "$LENGTH_OF_A" -eq "$LENGTH_OF_B" ]
then
let SAME_SIZE="true"
echo The files lengths were equal.
else
let DIFF=$LONGER_BYTE_NUMBER-$SHORTER_BYTE_NUMBER
echo "Difference in file lengths: $DIFF byte(s)"
fi
let DIFF_COUNT=$(cmp -l $LONGER $SHORTER 2> /dev/null | wc -w) # Runs byte by byte comparison sends it for a word count
let WRONG=$(((DIFF_COUNT)/3)) # Each line of 'cmp -l' is three 'words'
let CORRECT=$SHORTER_BYTE_NUMBER-$WRONG
ANSWER=$(echo "scale=9; $CORRECT / $SHORTER_BYTE_NUMBER * 100 " | bc )
echo "$1 length: $LENGTH_OF_A"
echo "$2 length: $LENGTH_OF_B"
echo "Unmatched bytes: $WRONG"
echo "Matched bytes: $CORRECT"
echo "Similarity is $ANSWER % of $SHORTER_BYTE_NUMBER bytes."
if [ "$SAME_SIZE" == "true" ]
then
exit
fi
ANSWER=$(echo "scale=9; $CORRECT / $LONGER_BYTE_NUMBER * 100" | bc )
echo "Similarity is $ANSWER % of the longer $LONGER_BYTE_NUMBER bytes."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment