Created
December 28, 2015 19:01
-
-
Save scien/a1dd2656043ede6c383c to your computer and use it in GitHub Desktop.
ballet songs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# dependencies | |
fs = require 'fs' | |
http = require 'http' | |
path = require 'path' | |
Crawler = require 'crawler' | |
# script | |
crawled = {} | |
c = new Crawler { | |
maxConnections: 25 | |
callback: (error, result, $) -> | |
{request} = result | |
url = request.href | |
return if crawled[url] | |
crawled[url] = true | |
console.log url | |
# crawl | |
$('a').each (index, a) -> | |
href = $(a).attr 'href' | |
return if not href | |
return if crawled[href] | |
# if page is on this website, crawl it | |
if href[0] is '/' | |
c.queue "http://balletclassmusicforfree.com#{href}" | |
if href.indexOf('mp3') > -1 | |
$content = $(a).closest '.content' | |
$region = $content.closest '.region-inner' | |
composer = $content.find('.field-name-field-composer a').text() | |
exercise = $content.find('.field-name-field-exercise a').text() | |
title = $region.find('h1.title').text() | |
filename = '' | |
filename += "#{composer} - " if composer | |
filename += "#{title} " if title | |
filename += "(#{exercise})" if exercise | |
filename = filename.trim() | |
filename += '.mp3' | |
# replace forward slash with division symbol | |
filename = filename.replace /\//g, '∕' | |
console.log 'DOWNLOAD MP3!', href | |
filename = path.join __dirname, 'songs', filename | |
file = fs.createWriteStream filename | |
http.get href, (resp) -> | |
resp.pipe file | |
} | |
c.queue 'http://balletclassmusicforfree.com/new' |
Author
scien
commented
Dec 28, 2015
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment