Skip to content

Instantly share code, notes, and snippets.

@jhubble
Last active March 18, 2023 19:09
Show Gist options
  • Save jhubble/88c6118212f5dfd06d5f80d1babea978 to your computer and use it in GitHub Desktop.
Save jhubble/88c6118212f5dfd06d5f80d1babea978 to your computer and use it in GitHub Desktop.
find duplicate songs
// Compare to mediainfo json files generated with mediainfo . --Output=JSON
// run with node findDupMusic.js source.json possibleDups.json
// Will output files in possibleDups that are in source
// add trailing -delete option to delete these duplicate files
// A trailing -fource after delete will delete even if source file not present
// All options are super dumb and dependent on place
const fs = require("fs");
const getOptions = (args) => {
const options = {};
let opt = '';
for (let i=2;i<args.length;i++) {
// -optes are boolean
if (args[i].startsWith('-')) {
opt = args[i];
if (opt === '-match') {
options[opt] = [];
}
else {
options[opt] = true;
}
}
// assume a value without a opt is the value for the previous opt item
else if (opt) {
if (opt === '-match') {
options[opt].push(args[i]);
}
else {
options[opt] = args[i];
}
}
}
return options;
}
const LOG_LEVEL = {
FATAL: 0,
ERROR: 1,
WARN: 2,
INFO: 3,
DEBUG: 4,
TRACE: 5,
ALL: 6
}
const log = (level, ...message) => {
if (level <= verbosity) {
console.log(...message);
if (level <= LOG_LEVEL.WARN) {
console.error(...message);
}
}
}
const showHelp = () => {
console.log(`USAGE:
Before running, use mediainfo command to get json files of the source and target directories:
mediainfo sourceDir --Output=JSON > source.json
mediainfo duplicateDir --Output=JSON > dups.json
Then run to clean up duplicate songs
node findDupMusic.js -source source.json -dups target.json -delete
OPTIONS:
-source file.json mediainfo file of source
-dups file.json mediainfo file of duplicates
-delete delete files (without, just warns of deletes)
-verbose 2 set verbosity. 0 is least verbose, 6 is most; default is 3
-clean clean up files that are "bad", but not matches ("poor quality" tags) (need -force and -delete to delete)
-force force delete, even if source file not present
-takeout treat dups as re-encoded google music takeouts with high bitrate, low quality. also those with 160 bit rate mp3 where source is aac
-takeout nuke delete any dup items with matching key regardless of other checks (dangerous)
-encode compare by encode date [can be risky]
-size compare by size [can be risky]
-various treat "various artists" in dup as any artist in source
-littleworse remove dups even if they have bit rate up to 5000 worse than source
-noparens remove anything in parenthesis when generating album/title/artist key
-small if duration is <30 seconds in dup, and greater in source, delete dup
-dashtitles assume a dash in title name delimites an artist and filter it out
-track match by artist, album and track number (instead of track title)
-h show this help
-match VAL MORE Delete any files where key matches value. Must be at least 8 characters
(key is ARTIST-ALBUM-SONG string in all uppercase, with articles
and non--alphanumeric stripped and cut to 18 characters each,
with dashes separating in each item.
Can specify multiple (need -delete and -force to delete)
-notitle don't match by title (default is to always match)
`);
}
const options = getOptions(process.argv);
if (!options['-source'] && !options['-dups']) {
console.error("Minimum usage: node findDupMusic.js -source fileWithSource.json -dups fileWithDups.json");
showHelp();
process.exit();
}
if (options['-h']) {
showHelp();
process.exit();
}
const fileWithSource = options['-source'];
const fileWithDups = options['-dups'];
const deletedFiles = {};
const verbosity = (options.hasOwnProperty('-verbose') && options.verbosity !== true) ? options['-verbose']-0 : 3;
console.log("VERBOSITY:",verbosity);
let deleteCount = 0;
let attemptCount = 0;
// mediainfo . --Output=JSON >allMediaFiles.json
const getMeta = (file => {
const meta = file.media.track.find((track) => track["@type"] === "General");
const filename = file.media["@ref"];
meta.filename = filename;
return meta;
});
const getTitle = (item) => {
return `\t${item.Performer}\t${item.Album}\t${item.Title}\t${item.Genre}\t${item.OverallBitRate}\t${item.Duration}\t${item.Format}\t${item.Encoded_application ||''}\t${item.Encoded_Date ||''}\t${item.Groupin}\t`;
}
const getDetails = (src, dup) => {
return `details<\t BR: S:${src.OverallBitRate} D:${dup.OverallBitRate}\t FMT: S:${src.Format} D:${dup.Format}\t DUR: S:${src.Duration}, D:${dup.Duration} (${(src.Duration-dup.Duration).toFixed(2)})\t ENC: S:${src.Encoded_Application || ''};${src.Encoded_library||''};${src.Encoded_Date || ''}, D: ${dup.Encoded_Application || ''};${dup.Encoded_library||''};${dup.Encoded_Date ||''}\t SIZE: S:${src.FileSize}, D:${dup.FileSize}\t Album: S:${src.Album},D:${dup.Album}\t File: S:${src.filename},D:${dup.filename} >`;
}
const cleanTxt = (txt) => {
//txt = txt.replace(/["_:']/g,'')
//Strip non-alphanumeric (some music managers mangle various characters)
// don't try to proces empty or null (or 0 or false...)
if (!txt || (typeof txt !== 'string')) {
return txt;
}
let newTxt = txt;
if (options['-noparens']) {
newTxt = newTxt.replace(/\([^\)]*\)/g,'');
}
// remove "the", reduce to 30 characters and strip to alphanumeric
//
// handle some mangling of special characters
newTxt = newTxt.replace(/Å™/g,'r');
// Doctor to dr
newTxt = newTxt.replace(/\bdoctor\b/ig,'dr');
// No. to #
newTxt = newTxt.replace(/\bNo./ig, '#');
newTxt = newTxt.replace(/\bthe\b/ig, '');
newTxt = newTxt.replace(/\.mp3$/,'');
newTxt = newTxt.replace(/\band\b/ig, '');
newTxt = newTxt.replace(/\ba\b/ig, '');
newTxt = newTxt.replace(/\bcd\b/ig, 'disc');
newTxt = newTxt.toUpperCase();
newTxt = newTxt.replace(/\bSIXTEEN\b/g,'16');
newTxt = newTxt.replace(/\[[^\]]+\]/,'');
newTxt = newTxt.replace(/[\W_]/g, '');
newTxt = newTxt.substring(0, 18);
// If we do strip everything, just return the original
if (!newTxt) {
return txt;
}
return newTxt;
}
const getKey = (Performer = '', Album = '', Title = '') => {
let key = (cleanTxt(Performer) + "-" + cleanTxt(Album).replace(/DISC\d+$/,'') + "-" + cleanTxt(Title));
return key;
}
const keyFromFileName = (meta) => {
try {
const filename = meta.filename;
// If we have a bit rate, try to fill in missing parts of metadata
if (meta.OverallBitRate) {
const parts = filename.split('/');
const file = parts.pop().replace(/\.[^.]*$/,'');
const album = parts.pop();
let [artist,...trackBits] = file.split('-');
let track = trackBits.join('-');
artist = artist.trim();
track = track.trim();
if (!track) {
track = artist;
}
meta.Performer = meta.Performer || artist;
meta.Album = meta.Album || album;
meta.Title = meta.Title || track;
// clean up artist/title titles
if (options['-dashtitles']) {
if (/-/.test(meta.Title) && !/-/.test(track)) {
meta.Title = track;
}
}
}
log(LOG_LEVEL.DEBUG,`Artist: ${meta.Performer}, Album: ${meta.Album}, Track: ${meta.Title}, File: ${filename}`);
return getKey(meta.Performer, meta.Album, meta.Title);
}
catch (e) {
log(LOG_LEVEL.DEBUG,"Unable to extract tracks from filename",filename);
}
return null;
}
const indexFiles = (filename) => {
log(LOG_LEVEL.INFO,"FILE>>", filename);
const index = {byTitle:{}, bySize:{}, byEncodedDate:{}, noAlbum:{}, noArtist:{}, noTrack:{}};
const mediacontents = JSON.parse(fs.readFileSync(filename, "utf8"));
mediacontents.forEach((file) => {
try {
const meta = getMeta(file);
let {
filename, Track = "", Title = "", Album = "", Performer = "", Duration, OverallBitRate
} = meta;
const sizeKey = `${OverallBitRate}-${Duration}`;
if (Track !== Title) {
log(LOG_LEVEL.DEBUG,"different track title:", Track, Title);
}
// Will try to fill in any missing parts of key
let key = keyFromFileName(meta);
log(LOG_LEVEL.TRACE,key, OverallBitRate, filename);
if (key !== '--') {
if (!index.byTitle[key]) {
index.byTitle[key] = [];
}
index.byTitle[key].push(meta);
// without album
const noAlbumKey = getKey(meta.Performer,'',meta.Title);
if (!index.noAlbum[noAlbumKey]) {
index.noAlbum[noAlbumKey] = [];
}
index.noAlbum[noAlbumKey].push(meta);
// without artist
const noArtistKey = getKey('',meta.Album,meta.Title);
if (!index.noArtist[noArtistKey]) {
index.noArtist[noArtistKey] = [];
}
index.noArtist[noArtistKey].push(meta);
// with track number instead of track
const trackKey = getKey(meta.Performer, meta.Album, meta.Track_Position);
if (!index.noTrack[trackKey]) {
index.noTrack[trackKey] = [];
}
index.noTrack[trackKey].push(meta);
}
else {
log(LOG_LEVEL.DEBUG,"Still no tags",filename,getTitle(meta));
if (meta.FileSize <10000) {
log(LOG_LEVEL.DEBUG,"SMALL_FILE:",filename,meta.FileSize);
}
}
if (OverallBitRate && Duration) {
if (!index.bySize[sizeKey]) {
index.bySize[sizeKey] = [];
}
index.bySize[sizeKey].push(meta);
}
if (meta.Encoded_Date) {
if (!index.byEncodedDate[meta.Encoded_Date]) {
index.byEncodedDate[meta.Encoded_Date] = [];
}
index.byEncodedDate[meta.Encoded_Date].push(meta);
}
} catch (e) {
log(LOG_LEVEL.DEBUG,e, file);
}
});
log(LOG_LEVEL.INFO,"INDEX LENGTH:", Object.keys(index.byTitle).length, Object.keys(index.bySize).length, Object.keys(index.byEncodedDate).length);
return index;
};
// second number is verbosity: 2 will show every last key
const sourceIndex = indexFiles(fileWithSource, verbosity);
const dupIndex = indexFiles(fileWithDups, verbosity);
log(LOG_LEVEL.INFO,Object.keys(sourceIndex.byTitle).length, Object.keys(dupIndex.byTitle).length);
// delete a target that is identical to the source.
// As a sanity check, do not deltee the file if it matches against itself
// Also, only delete if source exists (unless -force flag is set)
const deleteItem = (dupMeta, sourceMeta, songKey, reason='') => {
const sizeDiff = sourceMeta.FileSize - dupMeta.FileSize;
if(deletedFiles[dupMeta.filename]) {
log(LOG_LEVEL.INFO,`${dupMeta.filename} already deleted, not deleting`);
return;
}
if (sourceMeta.filename === dupMeta.filename) {
log(LOG_LEVEL.WARN,"SAME: source and destination are the same, not deleting", sourceMeta.filename, dupMeta.filename);
return;
} else {
log(LOG_LEVEL.INFO,`DUP: [${reason}]<${songKey}> ${sizeDiff} : ${getDetails(sourceMeta, dupMeta)}`);
}
const doDelete = (filename) => {
attemptCount++;
if (options['-delete']) {
fs.unlinkSync(filename);
deleteCount++;
}
else {
log(LOG_LEVEL.WARN,"--- (not deleting since flag not set)");
}
deletedFiles[filename] = 1;
}
try {
if (fs.existsSync(sourceMeta.filename)) {
log(LOG_LEVEL.WARN,`Deleting:\t${dupMeta.filename}\tSource:\t${sourceMeta.filename}`);
doDelete(dupMeta.filename);
} else if (options['-force']) {
log(LOG_LEVEL.WARN,"forcing deletion even though source does not exist");
doDelete(dupMeta.filename);
} else if (options['-clean']) {
log(LOG_LEVEL.WARN,"cleaning unwanted mp3s even though source does not exist");
doDelete(dupMeta.filename);
} else {
log(LOG_LEVEL.WARN,"source does not exist, not deleting");
}
} catch(e) {
log(LOG_LEVEL.ERROR,"error deleting", e);
}
}
// Iterate through duplicate directory by key (album, title, artist). Compare each file one by one.
// Then iterate with each source file with the same key
// Then compare each source and duplicate file
// 1. If Duration and Bitrate are same:
// a. If Filesizes are the same it is a dup
// b. If source is between 0 and 12500 bytes more, it is a dup
// c. If source is between 0 and 3000 bytes less it is a dup
// d. Otherwise, it is not a dup
// 2. If the difference in duration is less than 4 seconds and less then 2% of source duration
// a. And bitrate of source is greater or equal to that of dup, it is a dup
// b. otherwise it is not a dup
//
//
const findDupByTitle = (subIndex = 'byTitle', keyCheck=null, keySubIndex) => {
// A filter is done on the "keySubIndex", then the filtered items are used on subindex.
// Thus we may filter on all artists equal to "various artists", but do actual work on index without artist name
log(LOG_LEVEL.INFO,"\n...Finding dups by titile: subindex:",subIndex, "key filter:",keyCheck );
let filteredDupList =[];
if (keyCheck && keySubIndex) {
const keyRegex = new RegExp('\\b('+keyCheck+')\\b');
log(LOG_LEVEL.INFO,"Filtering on ",keyCheck);
Object.keys(dupIndex[keySubIndex]).forEach(k => {
if (keyRegex.test(k)) {
k = k.replace(keyRegex,'');
if (dupIndex[subIndex][k]) {
filteredDupList.push(k);
}
else {
log(LOG_LEVEL.TRACE,`${k} not found in ${subIndex}`);
}
}
});
}
else {
filteredDupList = Object.keys(dupIndex[subIndex]);
}
filteredDupList.forEach(songKey => {
if (sourceIndex[subIndex][songKey]) {
dupIndex[subIndex][songKey].forEach(dupMeta => {
let dups = 0;
const sourceSongs = sourceIndex[subIndex][songKey];
let reason ='';
sourceSongs.forEach(sourceMeta => {
// don't compare to self
if (sourceMeta.filename === dupMeta.filename) {
return;
}
const sizeDiff = sourceMeta.FileSize - dupMeta.FileSize;
const durationDiff = sourceMeta.Duration - dupMeta.Duration;
if (!dups) {
// 1. Same bitrate and duration with similar file size
if (
sourceMeta.OverallBitRate === dupMeta.OverallBitRate &&
sourceMeta.Duration == dupMeta.Duration &&
(sourceMeta.Duration > 0 || ((dupMeta.Format === sourceMeta.Format) && dupMeta.Format && dupMeta.Format.indexOf('Audio') !== -1 ))
) {
if (sourceMeta.FileSize === dupMeta.FileSize) {
log(LOG_LEVEL.DEBUG,">>>EQUAL SIZE FILES");
deleteItem(dupMeta, sourceMeta, songKey, 'BITDUR_EQUAL'+subIndex);
dups++;
} else if ((sizeDiff < 15000) && (sizeDiff > 0)) {
log(LOG_LEVEL.DEBUG,">>>less than 15000 more");
deleteItem(dupMeta, sourceMeta, songKey, 'BITDUR_SRC_BIGGER'+subIndex);
dups++;
} else if ((Math.abs(sizeDiff/sourceMeta.FileSize) < .03)) {
log(LOG_LEVEL.DEBUG,">>> size within 2%");
deleteItem(dupMeta, sourceMeta, songKey, 'BITDUR_SIZE_3_PERCENT'+subIndex);
dups++;
} else if ((options['-takeout']) && (sizeDiff >0)) {
log(LOG_LEVEL.DEBUG,`DELETING EVEN WITH SMALLER SIZE (TAKEOUT OVERRIDE) ${getDetails(sourceMeta, dupMeta)}`);
deleteItem(dupMeta, sourceMeta, songKey, 'BITDUR_SRC_MUCH_BIGGER__TAKEOUT'+subIndex);
dups++;
} else {
log(LOG_LEVEL.DEBUG,"SIZE TOO DIFFERENT", sourceMeta.FileSize - dupMeta.FileSize, sizeDiff, "SIZEDIFF:", `<${songKey}>`, dupMeta.filename, dupMeta.FileSize, dupMeta.File_Modified_Date, "SOURCE->", sourceMeta.filename, sourceMeta.FileSize, sourceMeta.File_Modified_Date);
reason += `[ SIZEDIFF: ${sizeDiff}, (${getDetails(sourceMeta, dupMeta)}) ]`;
}
}
// 2. Duration and dup bit rate is close or worse
else if ((Math.abs(durationDiff) < 6) && ((durationDiff / sourceMeta.Duration) < .03)
||(durationDiff <10 && durationDiff > 0)) {
log(LOG_LEVEL.DEBUG,"CLOSETIME:", durationDiff, "SIZEDIFF:", sizeDiff, `BIT RATE: SRC:`, sourceMeta.OverallBitRate, 'Duplicate:', dupMeta.OverallBitRate, `<${songKey}>`, dupMeta.filename, dupMeta.Duration, dupMeta.File_Modified_Date, "Source->", sourceMeta.filename, sourceMeta.Duration, sourceMeta.File_Modified_Date, "?", dupMeta.OverallBitRate < sourceMeta.OverallBitRate, sourceMeta.OverallBitRate - dupMeta.OverallBitRate);
let bitDiff = sourceMeta.OverallBitRate - dupMeta.OverallBitRate;
// 2.a. dup is worse bit rate
if ((sourceMeta.OverallBitRate - dupMeta.OverallBitRate) >= 0) {
log(LOG_LEVEL.DEBUG,"ok to delete (dup bit rate equal or worse than source)");
deleteItem(dupMeta, sourceMeta, songKey, 'CLOSEDUR_SRCBRBIGGER'+subIndex);
dups++;
}
// 2.b. dup is a likely aac recoded to much higher google music
else if ((options['-takeout'] && dupMeta.OverallBitRate == 160000) &&
(dupMeta.Format === 'MPEG Audio' )) {
log(LOG_LEVEL.DEBUG,"dup has better bit rate, but looks like google music re-encoding junk", getDetails(sourceMeta, dupMeta));
deleteItem(dupMeta, sourceMeta, songKey, 'CLOSEDUR_LAMERECODE'+subIndex);
}
// 2.c. dup is an mp3 only slightly higher bitrate than aac
else if (bitDiff > -10000 &&
dupMeta.Format === 'MPEG Audio' && sourceMeta.Format === 'MPEG-4') {
log(LOG_LEVEL.DEBUG,"dup has lame encoded mp3 of slightly better bit rate than AAC source, keeping source (possible google music junk)", bitDiff, getDetails(sourceMeta,dupMeta));
deleteItem(dupMeta, sourceMeta, songKey, 'CLOSEDUR_AAC_TO_MP3'+subIndex);
}
else if (options['-littleworse'] && (bitDiff > -5000 && bitDiff <=0 )) {
log(LOG_LEVEL.DEBUG,"dup has has only slightly better bitrate than source", bitDiff, getDetails(sourceMeta,dupMeta));
deleteItem(dupMeta, sourceMeta, songKey, 'CLOSEDUR_DUP_BETTER'+subIndex);
}
else {
log(LOG_LEVEL.DEBUG,"BETTER BIT RATE THAN SOURCE, not deleting", dupMeta.Encoded_Library,dupMeta.OverallBitRate);
reason += `[DP BETTER BR: ${bitDiff} (${getDetails(sourceMeta,dupMeta)}) ]`;
}
} else if (options['-small'] && dupMeta.Duration <=30 && sourceMeta.Duration > 30) {
log(LOG_LEVEL.DEBUG,"small duplicate song, deleting",dupMeta.duration, sourceMeta.duration);
deleteItem(dupMeta, sourceMeta, songKey, "SMALL_FILE",subIndex);
} else if (options['-small'] && options['-takeout'] &&
(durationDiff > 0 && (sourceMeta.OverallBitRate - dupMeta.OverallBitRate) >-30000)) {
log(LOG_LEVEL.DEBUG,"takeout duplicate key that is smaller and with bit rate no more than 30k more than source",dupMeta.duration, sourceMeta.duration);
deleteItem(dupMeta, sourceMeta, songKey, "SMALLER_TAKEOUT",subIndex);
} else if (options['-takeout'] && options['-takeout'] === 'nuke') {
log(LOG_LEVEL.DEBUG,"takeout duplicate key found. Nuke option set, so deleting",dupMeta.duration, sourceMeta.duration);
deleteItem(dupMeta, sourceMeta, songKey, "NUKE_TAKEOUT",subIndex);
} else {
log(LOG_LEVEL.DEBUG,sourceMeta.FileSize - dupMeta.FileSize, "BITDIFF:", `<${songKey}>`, dupMeta.filename, dupMeta.OverallBitRate, dupMeta.Duration, dupMeta.File_Modified_Date, "SOURCE->", sourceMeta.filename, sourceMeta.OverallBitRate, sourceMeta.Duration, sourceMeta.File_Modified_Date);
reason += `[BITDIFF: (${getDetails(sourceMeta,dupMeta)}) ]`;
}
}
});
if (!dups) {
log(LOG_LEVEL.INFO,"^^^UNIQUE: <", songKey, ">", getTitle(dupMeta),dupMeta.filename, reason);
}
});
} else {
const m = dupIndex[subIndex][songKey][0];
log(LOG_LEVEL.INFO,"^^NOSOURCE:<", songKey, ">", getTitle(m),dupIndex[subIndex][songKey].length, m.filename, m.OverallBitRate, "COMMENTS:\t",m.Comment);
}
});
}
// Clean out songs based on certain data (like mp3.com comment)
cleanBadSongs = () => {
Object.keys(dupIndex.byTitle).forEach(songKey => {
dupIndex.byTitle[songKey].forEach(m => {
log(LOG_LEVEL.DEBUG,"--->",songKey, m.Comment);
if(m.Comment && m.Comment.includes('mp3.com')) {
log(LOG_LEVEL.DEBUG,"MP3 SAMPLER:<", songKey, ">", getTitle(m),dupIndex.byTitle[songKey].length, m.filename, m.OverallBitRate, "COMMENTS:",m.Comment);
deleteItem(m, {}, songKey, 'CLEAN_MP3_COM');
}
if (m.Comment && m.Comment.includes('riffage.com')) {
log(LOG_LEVEL.DEBUG,"RIFFAGE SAMPLER:<", songKey, ">", getTitle(m),dupIndex.byTitle[songKey].length, m.filename, m.OverallBitRate, "COMMENTS:",m.Comment);
deleteItem(m, {}, songKey, 'CLEAN_RIFFAGE');
}
if (m.Comment && (m.Comment.includes('poor quality') || m.Comment.includes('muddy'))) {
log(LOG_LEVEL.DEBUG,"POOR QUALITY:<", songKey, ">", getTitle(m),dupIndex.byTitle[songKey].length, m.filename, m.OverallBitRate, "COMMENTS:",m.Comment);
deleteItem(m, {}, songKey, 'CLEAN_QUALITY_COMMENT');
}
if (m.Grouping && (m.Grouping.includes('Poor quality') || m.Grouping.includes('poor quality'))) {
log(LOG_LEVEL.DEBUG,"POOR QUALITY GROUPING:<", songKey, ">", getTitle(m),dupIndex.byTitle[songKey].length, m.filename, m.OverallBitRate, "GROUPING:",m.Grouping);
deleteItem(m, {}, songKey, 'CLEAN_QUALITY_GROUPING');
}
if (m.FileSize <10000 && /.mp3$/.test(m.filename)) {
log(LOG_LEVEL.DEBUG,"SMALL_MP3:",m.filename,m.FileSize, 'CLEAN_SMALL_MP3');
deleteItem(m, {}, songKey);
}
});
});
}
// Blind size comparison.
// Similar to other, but ignores title and focuses on numbers
//
const sizeCompare = () => {
log(LOG_LEVEL.INFO,"----- Comparing size -----");
Object.keys(dupIndex.bySize).forEach(songKey => {
if (sourceIndex.bySize[songKey]) {
dupIndex.bySize[songKey].forEach(dupMeta => {
let dups = 0;
const sourceSongs = sourceIndex.bySize[songKey];
sourceSongs.forEach(sourceMeta => {
const sizeDiff = sourceMeta.FileSize - dupMeta.FileSize;
const durationDiff = sourceMeta.Duration - dupMeta.Duration;
if (!dups) {
if (sourceMeta.FileSize === dupMeta.FileSize) {
log(LOG_LEVEL.DEBUG,">>>EQUAL FILES");
deleteItem(dupMeta, sourceMeta, songKey, 'SIZE_EQUAL');
dups++;
} else if (sourceMeta.Encode_Date === dupMeta.Encode_Date && sourceMeta.Encode_Date) {
log(LOG_LEVEL.DEBUG,">>>Same encode date sizeDiff:",sizeDiff);
deleteItem(dupMeta, sourceMeta, songKey, 'SIZE_SAME_ENCODE_DATE');
dups++;
} else {
log(LOG_LEVEL.DEBUG,"SIZE TOO DIFFERENT", sourceMeta.FileSize - dupMeta.FileSize, sizeDiff, "SIZEDIFF:", `<${songKey}>`, dupMeta.filename, dupMeta.FileSize, dupMeta.File_Modified_Date, "SOURCE->", sourceMeta.filename, sourceMeta.FileSize, sourceMeta.File_Modified_Date);
}
}
});
if (!dups) {
log(LOG_LEVEL.INFO,"unique size: <", songKey, ">",getTitle(dupMeta), dupMeta.filename);
}
});
} else {
const m = dupIndex.bySize[songKey][0];
log(LOG_LEVEL.INFO,"NOSIZESOURCE size:<", songKey, ">", dupIndex.bySize[songKey].length, m.filename, m.OverallBitRate);
}
});
}
const encodeDateCompare = () => {
log(LOG_LEVEL.INFO,"----- Comparing encode date -----");
Object.keys(dupIndex.byEncodedDate).forEach(songKey => {
if (sourceIndex.byEncodedDate[songKey]) {
dupIndex.byEncodedDate[songKey].forEach(dupMeta => {
let dups = 0;
const sourceSongs = sourceIndex.byEncodedDate[songKey];
sourceSongs.forEach(sourceMeta => {
const sizeDiff = sourceMeta.FileSize - dupMeta.FileSize;
const durationDiff = sourceMeta.Duration - dupMeta.Duration;
if (!dups) {
if (sourceMeta.FileSize === dupMeta.FileSize) {
log(LOG_LEVEL.DEBUG,">>>EQUAL FILES");
deleteItem(dupMeta, sourceMeta, songKey, 'ENCODE_DATE_EQUAL');
dups++;
} else if ((sizeDiff < 12500) && (sizeDiff > 0)) {
log(LOG_LEVEL.DEBUG,">>>less than 12500 more", 'ENCODE_DATE_SRC_BIGGER');
deleteItem(dupMeta, sourceMeta, songKey);
dups++;
} else if ((sizeDiff > -3000) && (sizeDiff < 0)) {
log(LOG_LEVEL.DEBUG,">>>-3000 to 0");
deleteItem(dupMeta, sourceMeta, songKey, 'ENCODE_DATE_DUP_BIGGER');
dups++;
} else {
log(LOG_LEVEL.DEBUG,"SIZE TOO DIFFERENT", sourceMeta.FileSize - dupMeta.FileSize, sizeDiff, "SIZEDIFF:", `<${songKey}>`, dupMeta.filename, dupMeta.FileSize, dupMeta.File_Modified_Date, "SOURCE->", sourceMeta.filename, sourceMeta.FileSize, sourceMeta.File_Modified_Date);
}
}
});
if (!dups) {
log(LOG_LEVEL.INFO,"unique encode date: <", songKey, ">",getTitle(dupSong), dupMeta.filename);
}
});
} else {
const m = dupIndex.byEncodedDate[songKey][0];
log(LOG_LEVEL.INFO,"NOENCODESOURCE size:<", songKey, ">", dupIndex.byEncodedDate[songKey].length, m.filename, m.OverallBitRate);
}
});
}
const filterByKey = (filterKeys) => {
log(LOG_LEVEL.INFO,"Deleting items that match key");
goodKeys = filterKeys.filter(filterKey => {
if (filterKey.length >=8) {
return true;
}
log(LOG_LEVEL.ERROR,`filter key: ${filterKey} is too small. Not using`);
return false;
});
if (!goodKeys.length) {
log(LOG_LEVEL.WARN,"No keys, not filtering");
return;
}
Object.keys(dupIndex.byTitle).filter(k => {
goodKeys.forEach(filterKey=> {
if (k.indexOf(filterKey) !== -1) {
dupIndex.byTitle[k].forEach(m => {
deleteItem(m, {}, k, 'SPECIFIED_KEY:'+k);
});
}
});
});
}
// Different duplicate finding methods
// remove explicit keys
if (options['-match'] && options['-match'].length) {
filterByKey(options['-match']);
}
// match by album, title, artist
if (!options['-notitle']) {
findDupByTitle();
}
else {
log(LOG_LEVEL.INFO,"-notitle flag, not doing title search")
}
// match by album, artist
if (options['-album']) {
log(LOG_LEVEL.INFO,"-album: Looking for duplicates without album");
findDupByTitle('noAlbum');
}
// match by album, artist
if (options['-track']) {
log(LOG_LEVEL.INFO,"-album: match by album artist and track number");
findDupByTitle('noTrack');
}
// find dups without regard for title where dup title is "VARIOUSARTISTS"
if (options['-various']) {
log(LOG_LEVEL.INFO,'-various: finding duplicates where "Various Artists" are looked for any artist');
findDupByTitle('noArtist','VARIOUSARTISTS|VARIOUS', 'byTitle');
}
// match by same size and bitrate
if (options['-size']) {
log(LOG_LEVEL.INFO,'-size: compare songs with same size');
sizeCompare();
}
// match by same encode date
if (options['-encode']) {
log(LOG_LEVEL.INFO,"-encode: compare songs with same encode date");
encodeDateCompare();
}
if (options['-clean']) {
log(LOG_LEVEL.INFO,"Cleaning out songs");
cleanBadSongs();
}
log(LOG_LEVEL.FATAL,`${deleteCount} files deleted (${attemptCount} attempted)`);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment