SavSanta/find-duplicate-files.bash

## find-duplicate-files.bash
find -type f -size +3M -print0 | while IFS= read -r -d '' i; do
  #echo $i
  echo -n '.'
  if grep -q "$i" md5-partial.txt; then
    echo -n ':'; #-e "\n$i  ---- Already counted, skipping.";
    continue;
  fi
  #md5sum "$i" >> md5.txt
  MD5=`dd bs=1M count=1 if="$i" status=none | md5sum`
  MD5=`echo $MD5 | cut -d' ' -f1`
  if grep "$MD5" md5-partial.txt; then echo -e "Duplicate: $i"; fi
  echo $MD5 $i >> md5-partial.txt
done
fi

## Show the duplicates
#sort md5-partial.txt | uniq  --check-chars=32 -d -c
#sort md5-partial.txt | uniq  --check-chars=32 -d -c | sort -b -n
#sort md5-partial.txt | uniq  --check-chars=32 -d -c | sort -b -n | cut -c 9-40 | xargs -I '{}' sh -c "grep '{}'  md5-partial.txt && echo"

## Show wasted space
if [ false ] ; then
sort md5-partial.txt | uniq  --check-chars=32 -d -c | while IFS= read -r -d '' LINE; do
  HASH=`echo $LINE | cut -c 9-40`;
  PATH=`echo $LINE | cut -c 41-`;
  ls -l '$PATH' | cud -c 26-34
done
	find -type f -size +3M -print0 \| while IFS= read -r -d '' i; do
	#echo $i
	echo -n '.'
	if grep -q "$i" md5-partial.txt; then
	echo -n ':'; #-e "\n$i ---- Already counted, skipping.";
	continue;
	fi
	#md5sum "$i" >> md5.txt
	MD5=`dd bs=1M count=1 if="$i" status=none \| md5sum`
	MD5=`echo $MD5 \| cut -d' ' -f1`
	if grep "$MD5" md5-partial.txt; then echo -e "Duplicate: $i"; fi
	echo $MD5 $i >> md5-partial.txt
	done
	fi

	## Show the duplicates
	#sort md5-partial.txt \| uniq --check-chars=32 -d -c
	#sort md5-partial.txt \| uniq --check-chars=32 -d -c \| sort -b -n
	#sort md5-partial.txt \| uniq --check-chars=32 -d -c \| sort -b -n \| cut -c 9-40 \| xargs -I '{}' sh -c "grep '{}' md5-partial.txt && echo"

	## Show wasted space
	if [ false ] ; then
	sort md5-partial.txt \| uniq --check-chars=32 -d -c \| while IFS= read -r -d '' LINE; do
	HASH=`echo $LINE \| cut -c 9-40`;
	PATH=`echo $LINE \| cut -c 41-`;
	ls -l '$PATH' \| cud -c 26-34
	done