Skip to content

Instantly share code, notes, and snippets.

@OrigamiTech
Created February 19, 2011 16:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save OrigamiTech/835178 to your computer and use it in GitHub Desktop.
Save OrigamiTech/835178 to your computer and use it in GitHub Desktop.
Bandcamp Scraper.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using HundredMilesSoftware.UltraID3Lib;
namespace MeinBandCamp
{
class Program
{
const string
rgxIsArtistPath = "^http://[a-z0-9\\-]+?\\.bandcamp\\.com/?$",
rgxIsAlbumPath = "^http://[a-z0-9\\-]+?\\.bandcamp\\.com/album/[a-z0-9\\-]+?/?$",
rgxIsTrackPath = "^http://[a-z0-9\\-]+?\\.bandcamp\\.com/track/[a-z0-9\\-]+?/?$",
rgxJsEscapedString = "\"([^\"]|(?<=\\\\)\")+?(?<!\\\\)\"",
albumArtFilename = "Folder.jpg";
static string
downloadsFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "MeinBandCamp");
static WebClient wc = new WebClient();
static void Main(string[] args)
{
try
{
List<string> urls = args.ToList();
if (urls.Count == 0)
{
Console.Write("No URLs detected. Enter one here: ");
urls.Add(Console.ReadLine());
}
if (!Directory.Exists(downloadsFolder))
Directory.CreateDirectory(downloadsFolder);
foreach (string path in urls)
{
string[] explode = path.Split(new char[] { '/', '.' });
if (Regex.Match(path, rgxIsArtistPath).Success)
GetArtist(explode[2]);
else if (Regex.Match(path, rgxIsAlbumPath).Success)
GetAlbum(explode[2], explode[6], -1);
else if (Regex.Match(path, rgxIsTrackPath).Success)
GetTrack(explode[2], explode[6]);
else
Console.WriteLine("Invalid bandcamp URL");
}
}
catch (Exception ex) { Console.Write(ex.ToString()); }
Console.WriteLine("All done! Press any key to finish.");
Console.ReadKey(true);
}
static void GetArtist(string paramArtist)
{
string rgxAlbumAndArt = "\\<a href=\"/album/[a-z0-9\\-]+?\"\\>\\<img class=\"resizableArt\" src=\"http://bandcamp\\.com/files/\\d+?/\\d+?/\\d+?-\\d+?.jpg\"/\\>\\</a\\>";
string pageUrl = "http://" + paramArtist + ".bandcamp.com/";
string pageData = wc.DownloadString(pageUrl);
MatchCollection mcAlbumAndArt = Regex.Matches(pageData, rgxAlbumAndArt);
foreach (Match m in mcAlbumAndArt)
{
string[] explode = m.Value.Split('"');
string paramAlbum = explode[1].Replace("/album/", "");
GetAlbum(paramArtist, paramAlbum, -1);
}
}
static void GetAlbum(string paramArtist, string paramAlbum, int track)
{
string
rgxAlbumArt = "\\<img src=\"http://bandcamp\\.com/files/\\d+?/\\d+?/\\d+?-\\d+?.jpg\" alt=\".+?Cover Art\"/\\>",
rgxAlbumTitle = "album_title : " + rgxJsEscapedString,
rgxArtistName = "\"artist\":" + rgxJsEscapedString,
rgxArtistName2 = " name : " + rgxJsEscapedString,
rgxStream = "\"http:\\\\/\\\\/[a-z0-9\\-]+?\\.bandcamp\\.com\\\\/download\\\\/track\\?enc=mp3-128&id=\\d+?&stream=\\d+?&ts=\\d+?\\.\\d+?&tsig=[0-9a-f]+?\"",
rgxTrackTitle = "\"title\":" + rgxJsEscapedString;
string pageUrl = "http://" + paramArtist + ".bandcamp.com/album/" + paramAlbum;
string pageData = wc.DownloadString(pageUrl);
Match mAlbumArt = Regex.Match(pageData, rgxAlbumArt);
Match mAlbumTitle = Regex.Match(pageData, rgxAlbumTitle);
MatchCollection mcStream = Regex.Matches(pageData, rgxStream);
MatchCollection mcTrackTitle = Regex.Matches(pageData, rgxTrackTitle);
string albumArtRemoteUrl = mAlbumArt.Value.Split('"')[1];
string artistName = "";
Match mArtistName = Regex.Match(pageData, rgxArtistName);
if (mArtistName.Success == true)
artistName = Regex.Unescape(mArtistName.Value.Substring(10, mArtistName.Value.Length - 11));
else
{
Match mArtistName2 = Regex.Match(pageData, rgxArtistName2);
artistName = Regex.Unescape(mArtistName2.Value.Substring(12, mArtistName2.Value.Length - 13));
}
string albumTitle = Regex.Unescape(mAlbumTitle.Value.Substring(15, mAlbumTitle.Value.Length - 16));
List<Song> songs = new List<Song>();
for (int i = 0; i < mcStream.Count; i++)
{
if (track == -1 || track == i + 1)
{
Song song = new Song();
song.Album = albumTitle;
song.Artist = artistName;
song.RemoteUrl = Regex.Unescape(mcStream[i].Value.Substring(1, mcStream[i].Value.Length - 2));
song.Title = Regex.Unescape(mcTrackTitle[i + 1].Value.Substring(9, mcTrackTitle[i + 1].Value.Length - 10));
song.TrackNum = (short)(i + 1);
songs.Add(song);
}
}
Console.WriteLine("Downloading " + albumTitle + " by " + artistName);
int[] startPoint = new int[] { Console.CursorLeft, Console.CursorTop };
foreach (Song song in songs)
Console.WriteLine(" " + song.TrackNum.ToString("00") + " " + song.Title);
Console.WriteLine(" " + albumArtFilename);
Console.SetCursorPosition(startPoint[0], startPoint[1]);
foreach (Song song in songs)
{
Console.ForegroundColor = song.Download() ? ConsoleColor.Green : ConsoleColor.Red;
Console.WriteLine(" " + song.TrackNum.ToString("00") + " " + song.Title);
}
string albumArtLocalUrl = Path.Combine(downloadsFolder, PathSanitise(artistName), PathSanitise(albumTitle), albumArtFilename);
Console.ForegroundColor = ConsoleColor.Green;
if (!File.Exists(albumArtLocalUrl))
{
try { wc.DownloadFile(albumArtRemoteUrl, albumArtLocalUrl); }
catch { Console.ForegroundColor = ConsoleColor.Red; }
}
Console.WriteLine(" " + albumArtFilename);
Console.ForegroundColor = ConsoleColor.Gray;
}
static void GetTrack(string paramArtist, string paramTrack)
{
string
rgxAlbumUrl = "album_url: " + rgxJsEscapedString,
rgxTrackNum = "\"track_number\":\\d+?,";
string pageUrl = "http://" + paramArtist + ".bandcamp.com/track/" + paramTrack;
string pageData = wc.DownloadString(pageUrl);
Match mAlbumUrl = Regex.Match(pageData, rgxAlbumUrl);
Match mTrackNum = Regex.Match(pageData, rgxTrackNum);
string albumUrl = Regex.Unescape(mAlbumUrl.Value.Substring(12, mAlbumUrl.Value.Length - 13));
int trackNum = int.Parse(mTrackNum.Value.Substring(15, mTrackNum.Value.Length - 16));
GetAlbum(paramArtist, albumUrl.Replace("/album/", ""), trackNum);
}
private class Song
{
private short _TrackNum = 1;
private string _Title = "Unknown Title";
private string _Artist = "Unknown Artist";
private string _Album = "Unknown Album";
private string _RemoteUrl;
private string _LocalDir
{ get { return Path.Combine(downloadsFolder, PathSanitise(_Artist), PathSanitise(_Album)); } }
private string _LocalUrl
{ get { return Path.Combine(_LocalDir, _TrackNum.ToString("00") + " " + PathSanitise(_Title) + ".mp3"); } }
public short TrackNum
{
get { return _TrackNum; }
set { _TrackNum = value; }
}
public string Title
{
get { return _Title; }
set { _Title = value; }
}
public string Artist
{
get { return _Artist; }
set { _Artist = value; }
}
public string Album
{
get { return _Album; }
set { _Album = value; }
}
public string RemoteUrl
{
get { return _RemoteUrl; }
set { _RemoteUrl = value; }
}
public string LocalDir
{ get { return _LocalDir; } }
public string LocalUrl
{ get { return _LocalUrl; } }
public bool Download()
{
bool success = false;
UltraID3 u = new UltraID3();
if (!Directory.Exists(_LocalDir))
Directory.CreateDirectory(_LocalDir);
if (!File.Exists(_LocalUrl))
{
try { wc.DownloadFile(_RemoteUrl, _LocalUrl); success = true; }
catch { Console.ForegroundColor = ConsoleColor.Red; }
}
else
success = true;
u.Read(_LocalUrl);
u.Album = _Album;
u.Artist = _Artist;
u.Title = _Title;
u.TrackNum = _TrackNum;
u.Write();
return success;
}
}
static string PathSanitise(string s)
{
string comparison = "\\/:*?\"<>|";
string output = "";
for (int i = 0; i < s.Length; i++)
if (!comparison.Contains(s[i].ToString()))
output += s[i];
return output;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment