Skip to content

Instantly share code, notes, and snippets.

@masato9000
Created June 18, 2017 20:44
Show Gist options
  • Save masato9000/676fe0f3eb28d4a5b205672cb9512d08 to your computer and use it in GitHub Desktop.
Save masato9000/676fe0f3eb28d4a5b205672cb9512d08 to your computer and use it in GitHub Desktop.
Single-purpose shell script for backing up CDs using cdrtools
#!/bin/sh
_bsdl='
Copyright (c) 2017, masato9000@users.noreply.github.com
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'
cleanup() {
# If this was run due to sigint, don't run again at exit
[ ${_clean}X = 1X ] && return
if [ -n "$workdir" ]; then
if [ -z "$noclean" ]; then
echo '\n'cleaning up...
rm -rf "$workdir"
echo ...done
else
echo \$noclean variable set
echo not deleting temporary files in $workdir
fi
fi
_clean=1
}
check_depends() {
# Check for needed commands
command -v readcd >/dev/null || {
echo Unable to locate readcd
echo Make sure you have cdrtools installed.
exit 1
}
command -v cdda2wav >/dev/null || {
echo Unable to locate cdda2wav
echo Make sure you have cdrtools installed.
exit 1
}
command -v flac >/dev/null || {
echo Unable to locate flac
echo Make sure you have the flac encoder installed
exit 1
}
}
write_config() {
cat<<_EOT >"$conf_file"
# Some eacsh variables (and even functions) can be redefined.
# Note that it's possible to break this script with bad values here.
# If that happens, just remove the config file and re-run eacsh
# base working directory (temp)
#riptemp="$HOME/EACsh/tmp"
# base directory for final results
#ripdest="$HOME/EACsh"
# default paraopts (passed to paranoia library; see man 1 cdda2wav)
#para_opts='paraopts=proof'
# cddb level (whether to do cddb lookups; see man 1 cdda2wav)
#cddb_level=0
# set "no" to not be prompted to hand-edit cddb info
#cddb_edit=
# optionsl cddb options (option=<value>; see man 1 cdda2wav)
#cddb_opts=
# text editor to use for editing cddb
#txt_editor=/usr/bin/vi
# File format (path and filename relative to $ripdest)
# Ignored if cddb lookups are disabled
# Special values understood by this script:
# <ARTIST>, <ALBUM, <TRACK>, <TITLE>, <DATE>, <GENRE>, <CDDB>
#file_spec="<ARTIST> - <ALBUM>/<TRACK> <TITLE>"
#file_spec_va="Various - <ALBUM>/<TRACK> <ARTIST> - <TITLE>"
_EOT
echo Default config file written to $conf_file
exit 255
}
get_rawtoc() {
readcd -fulltoc 2>&1 | \
awk '/^TOC\ len/{flag=1; next}
/^Lead\ out/{flag=0}
flag'
}
# TOC - block of 11-byte entries
# 1 session ID
# 2 "bflags" -hints on this field found in cdda2wav file: toc.c)
# - - first nibble (b 7-4) information about subchannels
# - - - for audio/data track entries, this is <= 3
# - - second nibble (b 3-0) track type is bit 2
# - - - (bflags&4==0) audio; (blags&4==4) data
# 4 type of entry
# - - 0x01-0x63 - regular track; start sector in MSF at bytes 9-11
# - - A0 - list session first track (byte 9)
# - - A1 - list session last track (byte 9)
# - - A2 - list session leadout in MSF at bytes 9-11
# - - B0 - describe next session
# - - - leadin in MSF at bytes 5-7
# - - - session number at byte 8
# - - - leadout in MSF at bytes 9-11
# NOTE: hex TOC dumped by readcd uses real physical frame addresses, but
# standards name physical sector 150 (2-sec pregap) as 0.
# All of our tools will need the adjusted value, so always subtract 150.
parse_rawtoc() {
while read _01 _02 _03 _04 _05 _06 _07 _08 _09 _10 _11; do
# Note session leadout; we'll use it if there's a data session
if [ $_04 = A2 ]; then
_lout=$(($(to_frames $((0x$_09)) $((0x$_10)) $((0x$_11)))-150))
continue
fi
# Ignore entries with bflags >= 0x3f (usually between sessions)
[ $((0x$_02)) -gt 63 ] && continue
# We're not presently interested in any other non-track values
[ $((0x$_04)) -gt 99 ] && continue
# Session 1 is all audio, except track1 in cases of mixed mode
if [ $_01 = 01 ]; then
# Check track type - data
if [ $((0x$_02 & 4)) -eq 4 ]; then
# Mixed mode discs (really yellowbook CD-ROMs) have a
# single data track followed by audio tracks. OK
if [ $_04 = 01 ]; then
mixedmode=yes
# Save start sector of data track
datastart=$(($(to_frames $((0x$_09)) $((0x$_10)) $((0x$_11)))-150))
continue
else # Your CD is weird. This script isn't for you
echo ERROR:\\t Data track detected in session 1 \(but not track1\)
echo \\tWe\'re not meant to handle this. Exiting...
exit 1
fi
elif [ $_04 = 01 ]; then
# detect extra pregap before track 1 (hidden track)
[ $(to_frames $((0x$_09)) $((0x$_10)) $((0x$_11))) -gt 150 ] &&
hidden=yes
fi
# Get end of data track for mixed-mode CDs
if [ $_04 = 02 -a ${mixedmode}X = yesX ]; then
dataend=$(($(to_frames $((0x$_09)) $((0x$_10)) $((0x$_11)))-150))
fi
# Only audio tracks left in session 1. Note the latest
a_last=$((0x$_04))
# Multisession E-CD/CD-PLUS/CD-EXTRA
elif [ $_01 = 02 ]; then
if [ -z "$a_last" ]; then # Not an Audio CD
echo ERROR:\\t No audio tracks detected in session 1
echo \\tWe\'re not meant to handle this. Exiting...
exit 1
# Mixed mode AND multisession: Wrong but maybe safe to ignore
elif [ ${mixedmode}X = yesX ]; then
echo WARNING: Found second session on mixed-mode CD
echo \\tAssuming standard mixed-mode and ignoring
echo \\tsubsequent sessions
break
# Warn on audio tracks in session 2. (Your CD is weird.)
elif [ $((0x$_02 & 4)) -eq 0 ]; then
echo WARNING: Session 2 has audio, but should be one data track
echo \\tOnly processing audio from session 1 of nonstandard disc
break
fi
multisession=yes
datastart=$(($(to_frames $((0x$_09)) $((0x$_10)) $((0x$_11)))-150))
dataend=$_lout
# Stop processing after one data track (there can be only one!)
break
fi
done<<_EOT
$toc
_EOT
a_first=1
if [ ${mixedmode}X = yesX ]; then
a_first=2
echo Mixed mode CD-ROM detected.
echo Will attempt to save data track from sectors $datastart to $dataend
elif [ ${multisession}X = yesX ]; then
if [ -z "$datastart" -o -z "$dataend" ]; then
echo WARNING: Multisession CD detected but no data track detected
echo \\tWill not attempt to rip ECD data track.
multisession=no
else
echo Multisession CD found\; likely an Enhanced-CD
echo Will attempt to save data track from sectors $datastart to $dataend
fi
fi
echo "Audio tracks available to rip:\t$a_first - $a_last"
}
cd_info() {
# Confirm that we have an actuall hidden track - sometimes cdda2wav
# doesn't register extra pregap as track0; trying to do anything with
# that track will fail.
if [ ${hidden}X = yesX ]; then
echo checking for hidden track \#0...
if cdda2wav -J -vtoc -g --no-infofile -out-fd=1 2>/dev/null | \
grep ^T00 >/dev/null ; then
echo ...track 0 detected and can be extracted
else
echo ...no extractable audio found in track \#0
unset hidden
fi
fi
# we can check for indices/mcn/isrc in one shot and save all the
# info files but for multisession discs, it doesn't want to do the
# .inf's correctly unless we specify only the audio tracks.
if [ ${hidden}X = yesX ]; then
_trange=0+$a_last
else
_trange=$a_first+$a_last
fi
echo getting info on selected tracks...
if [ $cddb_level = "-1" ]; then
echo Skipping cddb lookup\; audio files will not be auto-tagged
echo and will be named by CDDB disc ID and track number
else
get_cddb
return
fi
while read _line; do
case $_line in
CDDB*)
cddb_id=${_line#*: 0x}
;;
esac
done<<_EOT
$(cdda2wav -J -vindices,mcn,isrc -out-fd=1 -t$_trange -B track 2>/dev/null | tee /dev/stderr)
_EOT
}
get_cddb() {
cdda2wav -J -L $cddb_level $cddb_opts -vindices,mcn,isrc \
-t$_trange -B track 2>/dev/null
# Convert CDDB from ISO-8859-1 to native locale before editing
# Helpful for some terminal-based editors in UTF-8 locales
if command -v iconv >/dev/null; then
mv track.cddb track.cddb.LATIN1
iconv --from-code=ISO-8859-1 track.cddb.LATIN1 >track.cddb
fi
if [ ${edit_cddb}X != noX ]; then
read -p "Do you want to review/edit CDDB info? (y/N): " edit_response
case $edit_response in
y* | Y*)
$txt_editor track.cddb
;;
esac
read -p "Does this CD feature Various Artists? (y/N): " various
case $various in
y* | Y*)
various=y
;;
esac
fi
# read global info into variables.
_dtitle= disc_title= disc_artist=
disc_id= disc_year= disc_genre= track_names=
while read _line; do
# control chars are not legal for tags or paths and having to
# escape backslashes in commands just plain sucks.
_line=$(printf -- "%s" "$_line" |
tr "\"" "'" | tr -d "\\\\[:cntrl:]")
case $_line in
DISCID*)
cddb_id=${_line#*=}
;;
DTITLE*)
_dtitle=$_dtitle${_line#*=}
;;
DYEAR*)
disc_year=${_line#*=}
;;
DGENRE*)
disc_genre=${_line#*=}
;;
TTITLE*) # Read all TTITLE* lines for later parsing
track_names=$track_names"
"$_line
;;
esac
done<<_EOT
$(cat track.cddb)
_EOT
# In CDDB, the string " / " is a reserved separator for artist/title
disc_artist=${_dtitle% / *}
disc_title=${_dtitle#* / }
}
destprep() {
if [ ${cddb_level}X = "-1"X ]; then
dest_spec="$cddb_id/"
elif [ ${various}X = yX ]; then
dest_spec=${file_spec_va%/*}
else
dest_spec=${file_spec%/*}
fi
dest_dir=$(echo "$ripdest/$dest_spec" |
sed -e "s|<ARTIST>|$(sanitize "$disc_artist")|g" \
-e "s|<ALBUM>|$(sanitize "$disc_title")|g" \
-e "s|<DATE>|$disc_year|g" \
-e "s|<GENRE>|$(sanitize "$disc_genre")|g" \
-e "s|<CDDB>|$cddb_id|g")
[ -d "${dest_dir}" ] || mkdir -p "${dest_dir}"
if [ "$multisession"X = yesX ]; then
cp toc.dat "$dest_dir/ECD_multisession_toc.dat"
elif [ "$mixedmode"X = yesX ]; then
cp toc.dat "$dest_dir/CD-ROM_mixed-mode_toc.dat"
else
cp toc.dat "$dest_dir/"
fi
}
audiorip() {
if [ $1 -eq 1 -a ${hidden}X = yesX ]; then
_trange="0+1"
_basename="track"
else
_trange="$1+$1"
_basename=track_$(printf "%.2d" "$1")
fi
printf "\nRipping track $1...\t"
cdda2wav -t $_trange -vmcn -paranoia $para_opts \
-no-infofile -B $_basename 2>/dev/null
}
audiotags() {
_tracknum=$1
_tracknum_str=$2
printf -- "-T TRACKTOTAL=%s " "$a_last"
printf -- "-T TRACKNUMBER=%s " "$_tracknum_str"
printf -- "-T CDDB=%s " "$cddb_id"
# Without cddb, that's the most we can tag
[ "$cddb_level" = "-1" ] && return
printf -- "-T \"ALBUM=%s\" " "$disc_title"
printf -- "-T \"DATE=%s\" " "$disc_year"
printf -- "-T \"GENRE=%s\" " "$disc_genre"
while read _track; do
if [ "${_track%%=*}"X = "TTITLE$(($_tracknum-1))"X ]; then
_trackname=${_trackname}${_track#*=}
fi
done<<_EOT
$track_names
_EOT
# " / " artists/title separator also used for Various Artists tracks
if [ ${various}X = yX ]; then
_trackartist=${_trackname% / *}
_trackname=${_trackname#* / }
printf -- "-T \"ARTIST=%s\" " "$_trackartist"
else
printf -- "-T \"ARTIST=%s\" " "$disc_artist"
fi
printf -- "-T \"TITLE=%s\"" "$_trackname"
}
audioencode() {
_tracknum=$1
if [ $a_last -lt 10 ]; then
_tracknum_str=$_tracknum
else
_tracknum_str=$(printf "%0.2d" "$_tracknum")
fi
_tagopts=$(audiotags $_tracknum $_tracknum_str)
trackartist=${_tagopts#*\"ARTIST=}
trackartist=${trackartist%%\"*}
trackname=${_tagopts#*\"TITLE=}
trackname=${trackname%%\"*}
infile=track_$(printf "%0.2d" "$1").wav
infofile=track_$(printf "%0.2d" "$1").inf
if [ ${cddb_level}X = "-1"X ]; then
out_spec="<CDDB>_<TRACK>"
elif [ ${various}X = yX ]; then
out_spec=$(echo ${file_spec_va##*/} |
sed -e "s|<ARTIST>|<TARTIST>|g")
else
out_spec=${file_spec##*/}
fi
outfile=$dest_dir/$(echo "$out_spec" |
sed -e "s|<ARTIST>|$(sanitize "$disc_artist")|g" \
-e "s|<TARTIST>|$(sanitize "$trackartist")|g" \
-e "s|<ALBUM>|$(sanitize "$disc_title")|g" \
-e "s|<TRACK>|$_tracknum_str|g" \
-e "s|<TITLE>|$(sanitize "$trackname")|g" \
-e "s|<DATE>|$disc_year|g" \
-e "s|<GENRE>|$(sanitize "$disc_genre")|g" \
-e "s|<CDDB>|$cddb_id|g").flac
if [ $_tracknum -eq 1 -a ${hidden}X = yesX ]; then
_htname="00 hidden track.flac"
eval flac -5 -s "$_tagopts" -o "\"$dest_dir/$_htname\"" track_00.wav
cp track_00.inf "$dest_dir/"
fi
eval flac -5 -s "$_tagopts" -o "\"$outfile\"" $infile
printf -- "===FLAC===\n Finished encoding %s =>\n %s\n\n" "$infile" "$outfile"
cp $infofile "$dest_dir/"
}
datarip() {
if [ "$multisession"X = yesX -o "$mixedmode"X = yesX ]; then
echo Saving data track \(Sectors $datastart - $dataend\)
readcd sectors=$datastart-$dataend f=data.bin
else
return 0
fi
# Use last $dest_spec to determine copy destination
if [ "$multisession"X = yesX ]; then
cp data.bin "$dest_dir/ECD_sess:2_trk:$((a_last+1))_sector:$datastart.bin"
else
cp data.bin "$dest_dir/CDROM_sess:1_trk:1_sector:$datastart.bin"
fi
}
sanitize() {
# Note variables are already free of double-quotes and control chars
# This value is being used in sed replacement, so & (which we want to
# allow) needs to become escaped
printf -- "%s" "$@" | sed -e 's/^\.*//' -e 's/&/\\\&/g' |\
tr -d "><|*/"
}
to_frames() {
# minutes seconds frames. 75 frames per second, do the math
_f=$((($1*4500)+($2*75)+$3))
echo $_f
}
set -e
# Clean up when leaving
trap cleanup EXIT INT
check_depends
conf_file=$HOME/.eacsh.conf
riptemp="$HOME/EACsh/tmp"
ripdest="$HOME/EACsh"
para_opts='paraopts=proof'
cddb_level=0
cddb_edit=
cddb_opts=
txt_editor=/usr/bin/vi
file_spec="<ARTIST> - <ALBUM>/<TRACK> <TITLE>"
file_spec_va="Various - <ALBUM>/<TRACK> <ARTIST> - <TITLE>"
if [ -f "$conf_file" ]; then
. "$conf_file"
else
echo Config file $conf_file does not exist: creating...
write_config
fi
# Don't bother setting anything up if we can't read a disc
readcd sectors=0-0 -f=- >/dev/null 2>&1 || {
echo ERROR:\\tDrive not ready
echo \\tMake sure the drive contains a readable disc
exit 1
}
# Make sure we can create riptemp and ripdest, or if they already exist
# that we can write to them. Writability tests may fail on some systems
# if directories on read-only filesystems are falsely reported writable
mkdir -p "$riptemp" || {
echo ERROR:\\tFailed to create temporary directory $riptemp
echo \\tMake sure that \$riptemp is set to a writable location
exit 1
}
if [ ! -d "$riptemp" -o ! -w "$riptemp" ]; then
echo ERROR:\\tTemporary directory is not writable
echo \\tMake sure \$riptemp is set to a writable location on a
echo \\tread-write-mounted filesystem.
exit 1
fi
mkdir -p "$ripdest" || {
echo ERROR:\\tFailed to create output directory $ripdest
echo \\tMake sure that \$ripdest is set to a writable location
exit 1
}
if [ ! -d "$ripdest" -o ! -w "$ripdest" ]; then
echo ERROR:\\tOutput directory is not writable
echo \\tMake sure \$ripdest is set to a writable location on a
echo \\tread-write-mounted filesystem.
exit 1
fi
trklist="$@"
workdir="$(mktemp -d -p "$riptemp" EACsh.XXXXX)"
cd "$workdir"
toc="$(get_rawtoc)"
parse_rawtoc
if [ -n "$trklist" ]; then
do_data=no
for _trk in $trklist; do
case $_trk in
[0-9]|[1-9][0-9])
[ $_trk -lt 0 -o $_trk -gt $a_last ] && badtrack=yes
;;
data)
if [ ${multisession}X = yesX -o ${mixedmode}X = yesX]; then
do_data=yes
else
echo WARNING: data track specified but no data track found
echo \tContinuing without selecting a data track
fi
continue
;;
*)
badtrack=yes
;;
esac
if [ -n "$badtrack" ]; then
echo ERROR: Track "$_trk" not in range of audio tracks on disc
exit 1
fi
_last=$_trk
done
[ $do_data != yes ] && unset multisession mixedmode
rip_last=$_last
else
trklist="$(seq $a_first $a_last)"
rip_last=$a_last
fi
cd_info
destprep
datarip
for _trk in $trklist; do
audiorip $_trk
# Let audioencode run in bg
if [ $_trk -ne $rip_last ]; then
audioencode $_trk &
else
# We need to wait on the last one
audioencode $_trk
fi
done
ls -a *.cdtext >/dev/null 2>&1 && {
echo Found CD-Text
echo copying '*'.cdtext files
cp *.cdtext "$dest_dir"
}
dupISRC=$(grep -h -E [[:upper:]]{2}-[[:alnum:]]{3}-[[:digit:]]{2}-[[:digit:]]{5} *.inf |
uniq -d)
[ -n "$dupISRC" ] && {
echo WARNING: Duplicate ISRC numbers found in some tracks
echo '\t'This is probably due to a bug in the way your CD-ROM drive
echo '\t'reads ISRC codes. If accuracy is important, you should
echo '\t'manually check/correct your track_xx.inf files\' ISRC codes.
}
@masato9000
Copy link
Author

Just another script for ripping CDs with enough necessary information to reconstruct the original CD.

cdrdao is frequently used for this sort of thing (and it works well despite being unmaintained for many years) but this one uses cdrtools (not cdrkit) which is still actively maintained.

This probably has bugs or missing functions, but I'm not planning to work on it again as I've finished backing up my CD collection. Just stashing it here in case I find use for it again.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment