Skip to content

Instantly share code, notes, and snippets.

@uqmessias
Last active July 20, 2024 01:21
Show Gist options
  • Save uqmessias/bac5fca7620fc2836413c5a8728f2dac to your computer and use it in GitHub Desktop.
Save uqmessias/bac5fca7620fc2836413c5a8728f2dac to your computer and use it in GitHub Desktop.
CLI tool to help me to remove duplicate files in a folder (recursively)

Remove duplicate files CLI

How to use it

$ node ./removeDuplicateFilesCli.js
const fs = require("fs");
const path = require("path");
const crypto = require("crypto");
const stream = require("stream/promises");
const readline = require("readline");
async function computeHash(filepath) {
const input = fs.createReadStream(filepath);
const hash = crypto.createHash("sha256");
// Connect the output of the `input` stream to the input of `hash`
// and let Node.js do the streaming
await stream.pipeline(input, hash);
const computedHash = hash.digest("hex");
input.close();
return computedHash;
}
const currentAbsoluteFolderPath = path.resolve(process.cwd());
/** @typedef {{path: string; filePaths: string[]; folders: Folder[]}} Folder */
/**
* @param {string} fileOrFolderAbsolutePath
* @returns {{absolutePath: string; isFolder: boolean;}|null}
* */
async function getFileOrDirectory(fileOrFolderAbsolutePath) {
try {
const fileOrFolderStat = await fs.promises.stat(fileOrFolderAbsolutePath);
if (fileOrFolderStat.isDirectory()) {
return {
isFolder: true,
absolutePath: fileOrFolderAbsolutePath,
};
}
if (fileOrFolderStat.isFile()) {
return {
isFolder: false,
absolutePath: fileOrFolderAbsolutePath,
};
}
} catch {}
return null;
}
/**
* @param {string[]} filesRelativePaths
*/
async function deleteFiles(filesRelativePaths) {
await Promise.all(
filesRelativePaths.map((relativeFilePath) => {
writeLine(`Excluindo arquivo "${relativeFilePath}"`);
return fs.promises.rm(
path.join(currentAbsoluteFolderPath, relativeFilePath)
);
})
);
}
/**
* @param {string} absoluteFolderPath
* @returns {Folder}
* */
async function listAllFilesAndFoldersWithAbsolutePath(absoluteFolderPath) {
const filesAndFoldersInFolder = await fs.promises
.readdir(absoluteFolderPath)
.then(async (fileOrFolderNames) =>
Promise.all(
fileOrFolderNames.map((fName) =>
getFileOrDirectory(path.join(absoluteFolderPath, fName))
)
).then((absolutePaths) => absolutePaths.filter(Boolean))
);
/** @type {Folder[]} */
const folders = await Promise.all(
filesAndFoldersInFolder
.filter(({ isFolder }) => isFolder)
.map((f) => listAllFilesAndFoldersWithAbsolutePath(f.absolutePath))
);
/** @type {Folder} */
const folder = {
path: absoluteFolderPath,
filePaths: filesAndFoldersInFolder
.filter(({ isFolder }) => !isFolder)
.map(({ absolutePath }) => absolutePath),
folders,
};
return folder;
}
/** @type {Map<string, string[]>} */
const folderMap = new Map();
/**
*
* @param {Folder} folder
*/
async function computeFolderMapHashing(folder) {
for (let index = 0; index < folder.filePaths.length; index++) {
const filePath = folder.filePaths[index];
const fileHash = await computeHash(filePath);
const relativePath = path.relative(currentAbsoluteFolderPath, filePath);
const relativePaths = folderMap.has(fileHash)
? folderMap.get(fileHash)?.concat(relativePath)
: [relativePath];
folderMap.set(fileHash, relativePaths);
}
for (let index = 0; index < folder.folders.length; index++) {
const folderPath = folder.folders[index];
await computeFolderMapHashing(folderPath);
}
}
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
/**
* @param {...string} messages
* @returns {Promise<string>}
*/
async function ask(...messages) {
return new Promise((resolve) => {
for (let i = 0; i < messages.length - 1; i++) {
rl.write(`${messages[i]}\n`);
}
rl.question(`${messages[messages.length - 1]}\n`, resolve);
});
}
/**
* @param {string} message
*/
function writeLine(message) {
rl.write(`${message}\n`);
}
/**
*
* @param {string[]} filePaths
* @returns {number | 'i' | 'c'}
*/
async function getFileOption(filePaths) {
if (filePaths.length <= 1) {
return "i";
}
const indexStr = await ask(
`\n\nForam encontrados ${filePaths.length} arquivo${
filePaths.length !== 1 ? "s" : ""
} duplicados.`,
"Digite o índice correspondente ao arquivo que deseja manter:\n",
'"i"\t->\tpara IGNORAR estes arquivos',
'"c"\t->\tpara CANCELAR esses e TODOS os outros:',
...filePaths.map((p, pathIndex) => `"${pathIndex}"\t->\t${p}`)
);
const cleanIndexStr = indexStr.trim().toLocaleLowerCase();
const indexes = filePaths.map((_, index) => index).join("|");
const validInputRegexStr = `^c|i|${indexes}$`;
const validNonNumericInputRegexStr = "^c|i$";
if (!new RegExp(validInputRegexStr).test(cleanIndexStr)) {
rl.write(`\n\nOPÇÃO INVÁLIDA ("${indexStr}"), TENTE NOVAMENTE!\n\n`);
return getFileOption(filePaths);
}
if (new RegExp(validNonNumericInputRegexStr).test(cleanIndexStr)) {
return cleanIndexStr;
}
return Number.parseInt(cleanIndexStr, 10);
}
async function removeDuplicatedFilesCli() {
const folder = await listAllFilesAndFoldersWithAbsolutePath(
currentAbsoluteFolderPath
);
await computeFolderMapHashing(folder);
let hasAnyDuplicatedFile = false;
for (const [_, paths] of folderMap) {
if (paths.length <= 1) {
continue;
}
hasAnyDuplicatedFile = true;
const option = await getFileOption(paths);
if (option === "c") {
writeLine(
'Opção "c" selecionada, cancelando este e todos os outros arquivos!'
);
break;
}
if (option === "i") {
writeLine('Opção "i" selecionada, cancelando este arquivo!');
continue;
}
writeLine(
`Índice "${option}" selecionada, mantendo arquivo "${paths[option]}" e excluindo todos os outros:`
);
await deleteFiles(paths.filter((_, index) => index !== option));
}
if (!hasAnyDuplicatedFile) {
writeLine(
`Nenhum arquivo repetido foi encontrado nesse diretório: "${currentAbsoluteFolderPath}"`
);
}
process.exit(0);
}
removeDuplicatedFilesCli();
This file has been truncated, but you can view the full file.
@uqmessias
Copy link
Author

Usage example

removeDuplicateFilesCli.mp4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment