Last active
August 29, 2015 14:13
-
-
Save phiresky/dd8b794a8016adf59649 to your computer and use it in GitHub Desktop.
Download / Parse MrSuicideSheep Youtube Channel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# downloadChannel | |
# | |
# downloads a whole youtube channel, including descriptions and thumbnails | |
# should work on any channel or playlist | |
# youtube-dl is awesome | |
# depends: youtube-dl | |
if [[ ! "$1" ]]; then | |
echo "No channel specified" | |
exit | |
fi | |
user="$1" | |
channel="http://www.youtube.com/user/$user/videos" | |
playlistStart=1 | |
outdir="rawdownload" | |
mkdir -p "$outdir" | |
youtube-dl -o "$outdir/%(upload_date)s-%(id)s-%(title)s.%(ext)s" "$channel" --write-description --write-info-json --write-thumbnail --extract-audio --keep-video --playlist-start $playlistStart --ignore-errors |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# setMetadata | |
# | |
# extracts metadata from MrSuicideSheeps videos, combines them with the Audio files and sorts them into the output folder | |
# This works because his Videos always have a title in the form of "Interpret - Title" | |
# install atomicparsley-largefile from AUR in Arch Linux | |
# depends: bash, AtomicParsley, sed | |
# problems: AtomicParsley limits comment length to 256 characters, removing part of the description; so write it into lyrics also | |
INDIR="rawdownload" | |
OUTDIR="finalOutput" | |
mkdir -p $INDIR | |
mkdir -p log | |
rm -rf $INDIR/*-temp-* | |
IFS=$'\t' # use tab as array separator | |
files=($INDIR/*.m4a) | |
date=$(date -uIseconds|tr 'T:' '_-'|cut -c 1-19) # current time for log files | |
nomixes=true; #ignore sheepy mixes | |
for in in "${files[@]}"; do | |
echo -n Processing "'$in'; " | |
basename=$(basename "${in%m4a}") | |
info=($(sed -r 's/^([0-9]+)-(.{11})-(.+) - (.+)\.$/\1\t\2\t\3\t\4/'<<<$basename)) # create array with metadata using regex | |
d="${info[0]}" | |
Date=${d:0:4}-${d:4:2}-${d:6:2} # split date into year-month-date | |
if [[ ${#d} == 8 ]]; then | |
echo -n "song; " | |
ID="${info[1]}" | |
Artist="${info[2]}" | |
Title="${info[3]}" | |
else # regex did not match, probably a sheepy mix | |
echo -n "mix; " | |
ID="${d:9:11}" | |
Artist="MrSuicideSheep" | |
Title="${d:21:-1}" | |
$nomixes && continue; # skip if nomixes is true | |
fi | |
Comment=$(echo "Source: http://youtu.be/$ID"; cat "$INDIR/${basename}mp4.description") # insert source url before video description | |
outfilename="$OUTDIR/$Artist - $Title.m4a" # output filename, can be changed | |
outfilename=$(sed -e 's/[^A-Za-z 0-9._&()[\]-]/_/g' -e 's|\./|/|g' <<<$outfilename) # replace characters windows file systems do not like | |
mkdir -p "$(dirname "$outfilename")" | |
if [[ -f "$outfilename" ]]; then | |
echo -n "exists,skipping: '$outfilename'" | |
else | |
AtomicParsley "$in" --artist "$Artist" --title "$Title" --genre "MrSuicideSheep" \ | |
--year "$Date"T00:00:00Z --artwork "$INDIR/${basename}jpg" --comment "$Comment" \ | |
--lyrics "$Comment" --longdesc "$Comment" --encodingTool "phiresky" |tr '\n' '\t' | |
echo -n "written to: '$outfilename'" | |
mv "${in%.m4a}-"temp* "$outfilename" | |
fi | |
echo | |
done | pv -s ${#files[@]} -l >> log/metadataset${date}.log # use output lines as a progress indicator | |
echo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment