$ node ./removeDuplicateFilesCli.js
Last active
July 20, 2024 01:21
-
-
Save uqmessias/bac5fca7620fc2836413c5a8728f2dac to your computer and use it in GitHub Desktop.
CLI tool to help me to remove duplicate files in a folder (recursively)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require("fs"); | |
const path = require("path"); | |
const crypto = require("crypto"); | |
const stream = require("stream/promises"); | |
const readline = require("readline"); | |
async function computeHash(filepath) { | |
const input = fs.createReadStream(filepath); | |
const hash = crypto.createHash("sha256"); | |
// Connect the output of the `input` stream to the input of `hash` | |
// and let Node.js do the streaming | |
await stream.pipeline(input, hash); | |
const computedHash = hash.digest("hex"); | |
input.close(); | |
return computedHash; | |
} | |
const currentAbsoluteFolderPath = path.resolve(process.cwd()); | |
/** @typedef {{path: string; filePaths: string[]; folders: Folder[]}} Folder */ | |
/** | |
* @param {string} fileOrFolderAbsolutePath | |
* @returns {{absolutePath: string; isFolder: boolean;}|null} | |
* */ | |
async function getFileOrDirectory(fileOrFolderAbsolutePath) { | |
try { | |
const fileOrFolderStat = await fs.promises.stat(fileOrFolderAbsolutePath); | |
if (fileOrFolderStat.isDirectory()) { | |
return { | |
isFolder: true, | |
absolutePath: fileOrFolderAbsolutePath, | |
}; | |
} | |
if (fileOrFolderStat.isFile()) { | |
return { | |
isFolder: false, | |
absolutePath: fileOrFolderAbsolutePath, | |
}; | |
} | |
} catch {} | |
return null; | |
} | |
/** | |
* @param {string[]} filesRelativePaths | |
*/ | |
async function deleteFiles(filesRelativePaths) { | |
await Promise.all( | |
filesRelativePaths.map((relativeFilePath) => { | |
writeLine(`Excluindo arquivo "${relativeFilePath}"`); | |
return fs.promises.rm( | |
path.join(currentAbsoluteFolderPath, relativeFilePath) | |
); | |
}) | |
); | |
} | |
/** | |
* @param {string} absoluteFolderPath | |
* @returns {Folder} | |
* */ | |
async function listAllFilesAndFoldersWithAbsolutePath(absoluteFolderPath) { | |
const filesAndFoldersInFolder = await fs.promises | |
.readdir(absoluteFolderPath) | |
.then(async (fileOrFolderNames) => | |
Promise.all( | |
fileOrFolderNames.map((fName) => | |
getFileOrDirectory(path.join(absoluteFolderPath, fName)) | |
) | |
).then((absolutePaths) => absolutePaths.filter(Boolean)) | |
); | |
/** @type {Folder[]} */ | |
const folders = await Promise.all( | |
filesAndFoldersInFolder | |
.filter(({ isFolder }) => isFolder) | |
.map((f) => listAllFilesAndFoldersWithAbsolutePath(f.absolutePath)) | |
); | |
/** @type {Folder} */ | |
const folder = { | |
path: absoluteFolderPath, | |
filePaths: filesAndFoldersInFolder | |
.filter(({ isFolder }) => !isFolder) | |
.map(({ absolutePath }) => absolutePath), | |
folders, | |
}; | |
return folder; | |
} | |
/** @type {Map<string, string[]>} */ | |
const folderMap = new Map(); | |
/** | |
* | |
* @param {Folder} folder | |
*/ | |
async function computeFolderMapHashing(folder) { | |
for (let index = 0; index < folder.filePaths.length; index++) { | |
const filePath = folder.filePaths[index]; | |
const fileHash = await computeHash(filePath); | |
const relativePath = path.relative(currentAbsoluteFolderPath, filePath); | |
const relativePaths = folderMap.has(fileHash) | |
? folderMap.get(fileHash)?.concat(relativePath) | |
: [relativePath]; | |
folderMap.set(fileHash, relativePaths); | |
} | |
for (let index = 0; index < folder.folders.length; index++) { | |
const folderPath = folder.folders[index]; | |
await computeFolderMapHashing(folderPath); | |
} | |
} | |
const rl = readline.createInterface({ | |
input: process.stdin, | |
output: process.stdout, | |
}); | |
/** | |
* @param {...string} messages | |
* @returns {Promise<string>} | |
*/ | |
async function ask(...messages) { | |
return new Promise((resolve) => { | |
for (let i = 0; i < messages.length - 1; i++) { | |
rl.write(`${messages[i]}\n`); | |
} | |
rl.question(`${messages[messages.length - 1]}\n`, resolve); | |
}); | |
} | |
/** | |
* @param {string} message | |
*/ | |
function writeLine(message) { | |
rl.write(`${message}\n`); | |
} | |
/** | |
* | |
* @param {string[]} filePaths | |
* @returns {number | 'i' | 'c'} | |
*/ | |
async function getFileOption(filePaths) { | |
if (filePaths.length <= 1) { | |
return "i"; | |
} | |
const indexStr = await ask( | |
`\n\nForam encontrados ${filePaths.length} arquivo${ | |
filePaths.length !== 1 ? "s" : "" | |
} duplicados.`, | |
"Digite o índice correspondente ao arquivo que deseja manter:\n", | |
'"i"\t->\tpara IGNORAR estes arquivos', | |
'"c"\t->\tpara CANCELAR esses e TODOS os outros:', | |
...filePaths.map((p, pathIndex) => `"${pathIndex}"\t->\t${p}`) | |
); | |
const cleanIndexStr = indexStr.trim().toLocaleLowerCase(); | |
const indexes = filePaths.map((_, index) => index).join("|"); | |
const validInputRegexStr = `^c|i|${indexes}$`; | |
const validNonNumericInputRegexStr = "^c|i$"; | |
if (!new RegExp(validInputRegexStr).test(cleanIndexStr)) { | |
rl.write(`\n\nOPÇÃO INVÁLIDA ("${indexStr}"), TENTE NOVAMENTE!\n\n`); | |
return getFileOption(filePaths); | |
} | |
if (new RegExp(validNonNumericInputRegexStr).test(cleanIndexStr)) { | |
return cleanIndexStr; | |
} | |
return Number.parseInt(cleanIndexStr, 10); | |
} | |
async function removeDuplicatedFilesCli() { | |
const folder = await listAllFilesAndFoldersWithAbsolutePath( | |
currentAbsoluteFolderPath | |
); | |
await computeFolderMapHashing(folder); | |
let hasAnyDuplicatedFile = false; | |
for (const [_, paths] of folderMap) { | |
if (paths.length <= 1) { | |
continue; | |
} | |
hasAnyDuplicatedFile = true; | |
const option = await getFileOption(paths); | |
if (option === "c") { | |
writeLine( | |
'Opção "c" selecionada, cancelando este e todos os outros arquivos!' | |
); | |
break; | |
} | |
if (option === "i") { | |
writeLine('Opção "i" selecionada, cancelando este arquivo!'); | |
continue; | |
} | |
writeLine( | |
`Índice "${option}" selecionada, mantendo arquivo "${paths[option]}" e excluindo todos os outros:` | |
); | |
await deleteFiles(paths.filter((_, index) => index !== option)); | |
} | |
if (!hasAnyDuplicatedFile) { | |
writeLine( | |
`Nenhum arquivo repetido foi encontrado nesse diretório: "${currentAbsoluteFolderPath}"` | |
); | |
} | |
process.exit(0); | |
} | |
removeDuplicatedFilesCli(); |
This file has been truncated, but you can view the full file.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage example
removeDuplicateFilesCli.mp4