Skip to content

Instantly share code, notes, and snippets.

@ryjen
Last active June 29, 2020 07:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ryjen/309b1459e3b515a36eea03c6c69cef95 to your computer and use it in GitHub Desktop.
Save ryjen/309b1459e3b515a36eea03c6c69cef95 to your computer and use it in GitHub Desktop.
Finds duplicate songs in a selection in itunes
-- Author: Ryan Jennings <robert@ryanjennin.gs>
-- Date: Aug 2, 2019
-- Description:
-- Finds duplicate songs in a selection in itunes
-- Ranked meta data is:
-- 1. song name
-- 2. artist
-- 3. bitrate (ex 320 over 256)
-- 4. duration (ex keep 12:00 as too different from 4:40, but discard 3:50)
use AppleScript
use scripting additions
use application "iTunes"
-- utility handlers for various tasks
script util
-- quick sort implementation
on sort(aList as list, Le as integer, Ri as integer, comparator)
set [i, j] to [Le, Ri]
set v to item ((Le + Ri) div 2) in aList --> pivot in middle (as C.A.R. Hoare's algorithm)
repeat while j > i
repeat while comparator's compare(item i in aList, v) < 0
set i to i + 1
end repeat
repeat while comparator's compare(item j in aList, v) > 0
set j to j - 1
end repeat
if not i > j then
set temp to item i in aList
set item i in aList to item j in aList
set item j in aList to temp
set [i, j] to [i + 1, j - 1]
end if
end repeat
if Le < j then sort(aList, Le, j, comparator)
if Ri > i then sort(aList, i, Ri, comparator)
end sort
-- slits text into a list based on a delimiter
on split(input as text, delimiter as text)
set AppleScript's text item delimiters to delimiter
set value to every text item of input
set AppleScript's text item delimiters to ""
return value
end split
-- cleans text into a more comparable form
on sanitize(input as text)
set value to ""
repeat with ch in input
set c to id of ch
-- skip control characters
if c > 32 then
-- convert uppercase
if c ≥ 65 and c ≤ 90 then
-- to lowercase
set c to c + 32
end if
set value to value & ((character id c) as string)
end if
end repeat
return value
end sanitize
end script
-- logging related handlers
script logger
-- the log levels
property levels : {"trace", "debug", "warn", "fatal", "info"}
-- the current log level
property level : 2
on trace(message)
if level ≤ 1 then log (message)
end trace
on debug(message)
if level ≤ 2 then log (message)
end debug
on warn(message)
if level ≤ 3 then log (message)
end warn
on fatal(message)
if level ≤ 4 then log (message)
end fatal
on info(message)
if level ≤ 5 then log (message)
end info
on dheader(message)
my debug("")
my debug(message)
my debug("=========================================================================")
end dheader
end script
-- handlers related to meta data
script metadata
-- returns a description of a song meta data
on desc(value)
return (n in value) & " by " & (a in value) & " (" & (b in value) & "," & (t in value) & ")"
end desc
-- handlers related to comparing meta data
script comparator
-- compare length of a song
on len(m1, m2)
-- get the time fields
set t1 to t of m1
set t2 to t of m2
-- split the string
set t1 to my util's split(t1, ":")
set t2 to my util's split(t2, ":")
-- get the minutes fields
set n1 to item 1 of t1 as number
set n2 to item 1 of t2 as number
-- get the minute second fields
set s1 to item 2 of t1 as number
set s2 to item 2 of t1 as number
-- convert into unix timestamps
set t1 to (n1 * 60) + s1
set t2 to (n2 * 60) + s2
return t1 - t2
end len
-- compare song bitrates
on bitrate(m1, m2)
-- get the values
set b1 to b of m1
set b2 to b of m2
return b1 - b2
end bitrate
-- default compare method for meta data
on compare(m1, m2)
-- get the song name
set n1 to n of m1
set n2 to n of m2
if n1 is equal to n2 then
-- get the artist name
set a1 to a of m1
set a2 to a of m2
if a1 is equal to a2 then
-- compare bitrate next
set diff to bitrate(m1, m2)
if not diff is equal to 0 then return diff
-- finally compare time duration
return len(m1, m2)
end if
return 0
end if
return 0
end compare
end script
end script
-- handlers related to a list of song meta data
script metalist
-- prunes a list of meta data. assumes sorted.
on prune(input as list)
my logger's dheader("Pruning song list")
set output to {}
set pruned to 0
set remainder to 0
-- keep track of output
script outputer
-- the previous sorted meta data
property previous : {a:""}
-- increment the duplicates output
on inc(dup)
set output to output & {dup}
set previous to dup
set remainder to remainder + 1
end inc
-- decrement the duplicates output
on dec()
set pruned to pruned + 1
end dec
-- is a new sorted duplicate artist?
on is_next(dup)
return not ((a in previous) is equal to (a in dup))
end is_next
-- compare this duplicate with previous in sorted list
on diff(dup)
return metadata's comparator's compare(previous, dup)
end diff
end script
-- for each duplicate song...
repeat with dup in input
-- if its a new artist
if outputer's is_next(dup) then
-- and add to output
outputer's inc(dup)
else
-- otherwise compare for this artist only
set diff to outputer's diff(dup)
-- if the compare difference is large and negative
if diff is less than -60 then
-- add to duplicates output
outputer's inc(dup)
else
-- otherwise it is pruned from duplicates
outputer's dec()
my logger's trace("Pruned " & metadata's desc(dup))
end if
end if
end repeat
my logger's debug("Pruned " & pruned & " and kept " & remainder)
return output
end prune
end script
-- handlers related to a list of songs
script songlist
-- the list of songs
property values : missing value
-- sorts the values
on sorted()
set size to count of values
if meta_size is less than or equal to 1 then return values
my util's sort(value, 1, size, metadata's comparator)
return meta_data
end sorted
-- builds a list of meta data
on build(input)
set values to input
set output to {}
my logger's dheader("Building song list")
repeat with curr_index from 1 to (count of values)
set current to item curr_index in values
-- get the song name
set curr_name to name of current
set curr_name to my util's sanitize(curr_name)
-- get the artist name
set curr_artist to artist of current
set curr_artist to my util's sanitize(curr_artist)
set curr_bitrate to bit rate of current
set curr_time to time of current
-- create an info list
set curr_info to {i:curr_index, a:curr_artist, n:curr_name, b:curr_bitrate, t:curr_time}
set output to (output & {curr_info})
my logger's trace("Found " & my metadata's desc(curr_info))
end repeat
my logger's debug("Found " & (count of output) & " selected songs")
return output
end build
end script
-- the main script
script deduper
-- name of the playlist to move duplicates to
property playlist_name : missing value
-- the list of songs in itunes
property song_list : missing value
-- move a list of duplicates to the playlist
on to_playlist(duplicates as list)
my logger's dheader("Processing duplicates and setting playlist")
-- first create or clear the playlist
my create_playlist()
repeat with dup in duplicates
set dup_index to i of dup
set current_song to item dup_index of song_list
duplicate current_song to user playlist playlist_name
my logger's trace("Duplicating " & metadata's desc(dup) & " to playlist")
end repeat
end to_playlist
-- creates the playlist
on create_playlist()
if playlist playlist_name exists then
my logger's trace("Clearing tracks in playlist")
delete every track of user playlist playlist_name
else
my logger's trace("Creating playlist \"" & playlist_name & "\"")
make new user playlist with properties {name:playlist_name}
end if
end create_playlist
-- runs the deduper on a song list for a play list
on dedupe(s_list, pl_name)
set song_list to s_list
set playlist_name to pl_name
my logger's dheader("Starting dedupe")
my logger's warn("It is recommended to run on selected items with \"Show Duplicate Items\" enabled. Then remove the songs from the library from the generated playlist")
-- first build a list of song meta data
set output to my songlist's build(song_list)
-- now prune any duplicates we want to keep
set output to my metalist's prune(output)
-- finally output to the playlist
my to_playlist(output, song_list)
my logger's debug("Done, identified " & (count of output) & " duplicates for removal")
end dedupe
end script
on run
tell application "iTunes"
if not running then
display alert "iTunes is not running" message "Please activate iTunes and select items."
return
end if
set DupPlaylistName to "Found Duplicates"
set sl to selection
my deduper's dedupe(sl, DupPlaylistName)
end tell
end run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment