Skip to content

Instantly share code, notes, and snippets.

@gpollo
Last active December 1, 2015 16:36
Show Gist options
  • Save gpollo/262ac6a2c7107acf7ac2 to your computer and use it in GitHub Desktop.
Save gpollo/262ac6a2c7107acf7ac2 to your computer and use it in GitHub Desktop.
#!/bin/bash
# Do not specify a $DUP_NAME with a space.
DUP_NAME=Duplicate
DUP_DIR="$(pwd)/$DUP_NAME"
function enter_subfolder {
DIR=$(find ./ -maxdepth 1 -type d)
COUNT=$(echo "$DIR" | wc -l)
# We check if we can go deeper.
if [[ "$COUNT" == "1" ]]; then
# Execute the filter since we are at the last folder of the tree.
execute_filter
else
# We loop through each folder.
while read LINE; do
# We ignore the current folder.
if [[ "$LINE" == "./" ]]; then
continue
fi
# We ignore the $DUP_DIR folder.
if [[ "$LINE" == "./$DUP_NAME" ]]; then
cd "$LINE"
CHECK=$(pwd)
cd ..
if [[ "$CHECK" == "$DUP_DIR" ]]; then
continue
fi
fi
# We recursively go into folder until we are at the last one.
cd "$LINE"
enter_subfolder
cd ..
done < <(echo "$DIR")
fi
}
function execute_filter {
# Snapchat uses *.jpg and *.mp4 files.
FILES=$(find ./ -maxdepth 1 -type f -regex ".*/.*\.\(mp4\|jpg\)")
HASHES=()
COUNT=$(echo "$FILES" | wc -l)
# We make sure that there is at least one file.
if [[ "$COUNT" != 0 ]]; then
# We loop through each file.
while read LINE; do
# We ignore the file if, for some reason, the string is empty.
if [[ -z "$LINE" ]]; then
continue
fi
# We hash the file.
HASH=$(sha512sum "$LINE" | awk '{print $1;}')
printf "$HASH\t"
# We store each hash in order to lookup if the hash already exists.
if [[ "${HASHES[@]}" =~ "$HASH" ]]; then
# We move the file if it is a duplicated file.
printf "DUPLICATE DETECTED\n"
mv "$LINE" "$DUP_DIR"
else
# We store the hash if it is the first time we encountered it.
printf "\n"
HASHES+=("$HASH")
fi
done < <(echo "$FILES")
fi
}
# The execution starts here.
mkdir -p "$DUP_DIR"
enter_subfolder
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment