Skip to content

Instantly share code, notes, and snippets.

@phiresky
Last active August 29, 2015 14:13
Show Gist options
  • Save phiresky/dd8b794a8016adf59649 to your computer and use it in GitHub Desktop.
Save phiresky/dd8b794a8016adf59649 to your computer and use it in GitHub Desktop.
Download / Parse MrSuicideSheep Youtube Channel
#!/bin/bash
# downloadChannel
#
# downloads a whole youtube channel, including descriptions and thumbnails
# should work on any channel or playlist
# youtube-dl is awesome
# depends: youtube-dl
if [[ ! "$1" ]]; then
echo "No channel specified"
exit
fi
user="$1"
channel="http://www.youtube.com/user/$user/videos"
playlistStart=1
outdir="rawdownload"
mkdir -p "$outdir"
youtube-dl -o "$outdir/%(upload_date)s-%(id)s-%(title)s.%(ext)s" "$channel" --write-description --write-info-json --write-thumbnail --extract-audio --keep-video --playlist-start $playlistStart --ignore-errors
#!/bin/bash
# setMetadata
#
# extracts metadata from MrSuicideSheeps videos, combines them with the Audio files and sorts them into the output folder
# This works because his Videos always have a title in the form of "Interpret - Title"
# install atomicparsley-largefile from AUR in Arch Linux
# depends: bash, AtomicParsley, sed
# problems: AtomicParsley limits comment length to 256 characters, removing part of the description; so write it into lyrics also
INDIR="rawdownload"
OUTDIR="finalOutput"
mkdir -p $INDIR
mkdir -p log
rm -rf $INDIR/*-temp-*
IFS=$'\t' # use tab as array separator
files=($INDIR/*.m4a)
date=$(date -uIseconds|tr 'T:' '_-'|cut -c 1-19) # current time for log files
nomixes=true; #ignore sheepy mixes
for in in "${files[@]}"; do
echo -n Processing "'$in'; "
basename=$(basename "${in%m4a}")
info=($(sed -r 's/^([0-9]+)-(.{11})-(.+) - (.+)\.$/\1\t\2\t\3\t\4/'<<<$basename)) # create array with metadata using regex
d="${info[0]}"
Date=${d:0:4}-${d:4:2}-${d:6:2} # split date into year-month-date
if [[ ${#d} == 8 ]]; then
echo -n "song; "
ID="${info[1]}"
Artist="${info[2]}"
Title="${info[3]}"
else # regex did not match, probably a sheepy mix
echo -n "mix; "
ID="${d:9:11}"
Artist="MrSuicideSheep"
Title="${d:21:-1}"
$nomixes && continue; # skip if nomixes is true
fi
Comment=$(echo "Source: http://youtu.be/$ID"; cat "$INDIR/${basename}mp4.description") # insert source url before video description
outfilename="$OUTDIR/$Artist - $Title.m4a" # output filename, can be changed
outfilename=$(sed -e 's/[^A-Za-z 0-9._&()[\]-]/_/g' -e 's|\./|/|g' <<<$outfilename) # replace characters windows file systems do not like
mkdir -p "$(dirname "$outfilename")"
if [[ -f "$outfilename" ]]; then
echo -n "exists,skipping: '$outfilename'"
else
AtomicParsley "$in" --artist "$Artist" --title "$Title" --genre "MrSuicideSheep" \
--year "$Date"T00:00:00Z --artwork "$INDIR/${basename}jpg" --comment "$Comment" \
--lyrics "$Comment" --longdesc "$Comment" --encodingTool "phiresky" |tr '\n' '\t'
echo -n "written to: '$outfilename'"
mv "${in%.m4a}-"temp* "$outfilename"
fi
echo
done | pv -s ${#files[@]} -l >> log/metadataset${date}.log # use output lines as a progress indicator
echo
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment