RussianNeuroMancer/search_duplicate_blocks.sh

## search_duplicate_blocks.sh
#!/bin/bash

# This script generates checksum of partition blocks

# License: GNU GPL version 3 or any later version as released by the Free Software Foundation
# NO WARRANTY

# Author of original script: https://www.reddit.com/user/gnulicious
# https://gist.github.com/anonymous/e0493e823091078b5132

# Author: RussianNeuroMancer

[[ $# != 5 ]] && { echo "Expected 5 arguments. Look into script for examples." ; exit ; }

partition="$1" # Partition location, for example: /dev/sda1
block="$2" # Block size for hashing in MiB, for example 2
start="$3" # Start block in MiB, for example 0.
size="$4" # Scan area in specified blocks, for example 51200 (which is 100 GiB for 2 MiB block)
hash="$5" # Hash algorithm supported by coreutils, for example md5 or sha1.

end=$(($start+$size))
hashtool="${hash}sum"

echo "Partition: $partition"
echo "Block size: $block MiB"
echo "Start from: $start MiB"
echo "Until: $(($end*$block)) MiB"
echo "Hash utility: $hashtool"
echo
echo "Performing hashing of partition blocks..."

for count in `seq $start $end`
do
    sudo dd ibs=${block}M obs=${block}M if=$partition skip=$count count=1 status=none | $hashtool  -b - | awk '{print $1;}' >> result.txt
    echo "$(cat result.txt) $count" > result.txt
    echo $((($end-$count)*$block)) "MiB remaining"
done

echo
echo "Hashing done!"
echo
echo "Checking for duplicates blocks:"
awk 'BEGIN {while ((getline < "result.txt") > 0) if(a[$1]++) print }' > dup.txt
echo
echo "Found duplicates blocks listed in dup.txt"
	#!/bin/bash

	# This script generates checksum of partition blocks

	# License: GNU GPL version 3 or any later version as released by the Free Software Foundation
	# NO WARRANTY

	# Author of original script: https://www.reddit.com/user/gnulicious
	# https://gist.github.com/anonymous/e0493e823091078b5132

	# Author: RussianNeuroMancer

	[[ $# != 5 ]] && { echo "Expected 5 arguments. Look into script for examples." ; exit ; }

	partition="$1" # Partition location, for example: /dev/sda1
	block="$2" # Block size for hashing in MiB, for example 2
	start="$3" # Start block in MiB, for example 0.
	size="$4" # Scan area in specified blocks, for example 51200 (which is 100 GiB for 2 MiB block)
	hash="$5" # Hash algorithm supported by coreutils, for example md5 or sha1.

	end=$(($start+$size))
	hashtool="${hash}sum"

	echo "Partition: $partition"
	echo "Block size: $block MiB"
	echo "Start from: $start MiB"
	echo "Until: $(($end*$block)) MiB"
	echo "Hash utility: $hashtool"
	echo
	echo "Performing hashing of partition blocks..."

	for count in `seq $start $end`
	do
	sudo dd ibs=${block}M obs=${block}M if=$partition skip=$count count=1 status=none \| $hashtool -b - \| awk '{print $1;}' >> result.txt
	echo "$(cat result.txt) $count" > result.txt
	echo $((($end-$count)*$block)) "MiB remaining"
	done

	echo
	echo "Hashing done!"
	echo
	echo "Checking for duplicates blocks:"
	awk 'BEGIN {while ((getline < "result.txt") > 0) if(a[$1]++) print }' > dup.txt
	echo
	echo "Found duplicates blocks listed in dup.txt"