adamburmister/hypem-crawler.rb

## hypem-crawler.rb
#!/usr/bin/ruby

require "rubygems"
require "hpricot"
require "open-uri"
require "builder"
require "httpclient"
require "yaml"
require "cgi"

module Flog
  class Radio

    CRAWL_CONFIG = YAML.load_file(File.join(File.dirname(__FILE__), '/flog_radio_config.yml'))

    def initialize()
      puts "Downloading MP3s from #{CRAWL_CONFIG['website_url']}"
      download_mp3s scrape_mp3_urls grab_page CRAWL_CONFIG['website_url']
      upload_to_webdav
    end

    def grab_page(url)
      Hpricot(open(url, "User-Agent" => CRAWL_CONFIG['user_agent']))
    end

    def scrape_mp3_urls(html)
      urls = Array.new
      (html/"#recently-posted a").each do |el|
        if el.to_html =~ /onmousedown=\"this.href='\/go\/track\/\d*'; return false;\"/
          urls.push el.attributes['href']
        end
      end
      urls.uniq!
    end

    def download_mp3s(urls = [])
      urls.each do |url|
        `wget -r -l1 -H -t1 -nd -N -np -A.mp3 -erobots=off -U "#{CRAWL_CONFIG['user_agent']}" -P #{File.join(File.dirname(__FILE__), CRAWL_CONFIG['download_dir'])} #{url}`
      end
    end

    def upload_to_webdav
      username = CRAWL_CONFIG['webdav_username']
      password = CRAWL_CONFIG['webdav_password']

      url = URI.parse(CRAWL_CONFIG['webdav_url'])

      c = HTTPClient.new

      mp3s = Dir.glob(CRAWL_CONFIG['download_dir'] + "/*.mp3").sort_by { |f| File.file?(f) ? File.mtime(f) : Time.mktime(0) }
      mp3s.each do |mp3|
        dst = "#{url}#{CGI::escape(File.basename(mp3)).gsub('+','_')}"
        puts "Uploading to #{dst}"
        c.set_auth(url, username, password)
        c.put(dst, File.open(mp3).read)
      end
    end

  end
end

Flog::Radio.new()
	#!/usr/bin/ruby

	require "rubygems"
	require "hpricot"
	require "open-uri"
	require "builder"
	require "httpclient"
	require "yaml"
	require "cgi"

	module Flog
	class Radio

	CRAWL_CONFIG = YAML.load_file(File.join(File.dirname(__FILE__), '/flog_radio_config.yml'))

	def initialize()
	puts "Downloading MP3s from #{CRAWL_CONFIG['website_url']}"
	download_mp3s scrape_mp3_urls grab_page CRAWL_CONFIG['website_url']
	upload_to_webdav
	end

	def grab_page(url)
	Hpricot(open(url, "User-Agent" => CRAWL_CONFIG['user_agent']))
	end

	def scrape_mp3_urls(html)
	urls = Array.new
	(html/"#recently-posted a").each do \|el\|
	if el.to_html =~ /onmousedown=\"this.href='\/go\/track\/\d*'; return false;\"/
	urls.push el.attributes['href']
	end
	end
	urls.uniq!
	end

	def download_mp3s(urls = [])
	urls.each do \|url\|
	`wget -r -l1 -H -t1 -nd -N -np -A.mp3 -erobots=off -U "#{CRAWL_CONFIG['user_agent']}" -P #{File.join(File.dirname(__FILE__), CRAWL_CONFIG['download_dir'])} #{url}`
	end
	end

	def upload_to_webdav
	username = CRAWL_CONFIG['webdav_username']
	password = CRAWL_CONFIG['webdav_password']

	url = URI.parse(CRAWL_CONFIG['webdav_url'])

	c = HTTPClient.new

	mp3s = Dir.glob(CRAWL_CONFIG['download_dir'] + "/*.mp3").sort_by { \|f\| File.file?(f) ? File.mtime(f) : Time.mktime(0) }
	mp3s.each do \|mp3\|
	dst = "#{url}#{CGI::escape(File.basename(mp3)).gsub('+','_')}"
	puts "Uploading to #{dst}"
	c.set_auth(url, username, password)
	c.put(dst, File.open(mp3).read)
	end
	end

	end
	end

	Flog::Radio.new()