Skip to content

Instantly share code, notes, and snippets.

@stefansundin
Last active August 29, 2015 14:07
Show Gist options
  • Save stefansundin/22259d6309f34084cb8e to your computer and use it in GitHub Desktop.
Save stefansundin/22259d6309f34084cb8e to your computer and use it in GitHub Desktop.
Download all remixes from an indabamusic campaign.
# https://gist.github.com/stefansundin/22259d6309f34084cb8e
require 'net/http'
require 'json'
require 'time'
require 'iso_country_codes'
slug = 'infected-mushroom-kipod-remix-contest'
dest = "indabamusic-#{slug}"
def download(url, dest, limit=10)
# puts url
raise ArgumentError, 'HTTP redirect too deep' if limit == 0
uri = URI.parse(url)
http = Net::HTTP.new(uri.host, uri.port)
if uri.port == 443
# http.verify_mode = OpenSSL::SSL::VERIFY_NONE
http.use_ssl = true
end
request = Net::HTTP::Get.new(uri.request_uri)
http.request(request) do |response|
case response
when Net::HTTPSuccess then
open(dest, 'wb') do |io|
response.read_body do |chunk|
io.write(chunk)
end
end
when Net::HTTPRedirection then
download(response['location'], dest, limit-1)
end
end
end
def mkdir(path)
Dir.mkdir(path) unless File.directory?(path)
end
def filename_filter(fn)
fn.gsub(/[^\w ]/, '').strip
end
mkdir(dest)
mkdir("#{dest}/data")
mkdir("#{dest}/data/comments")
path = "#{dest}/data/#{slug}.json"
download("https://lydian.indabamusic.com/opportunities/#{slug}", path)
opp = JSON.parse(File.read(path))
id = opp['data']['id']
offset = 0
loop do
puts "offset: #{offset}"
path = "#{dest}/data/submissions-%03d.json" % offset
download("https://lydian.indabamusic.com/opportunities/#{id}/submissions?offset=#{offset}&sort_by=created_at", path)
response = JSON.parse(File.read(path))
response['data'].each.with_index do |item, i|
created_at = Time.parse item['created_at']
dir = "#{created_at.strftime('%F')} - #{filename_filter(item['user']['name'])}"
mkdir("#{dest}/#{dir}")
mp4_url = item['mp4_url']
fn = mp4_url[mp4_url.rindex('/')+1..-1]
puts "Downloading %3d: #{dir}/#{fn}" % (offset+i+1)
path = "#{dest}/#{dir}/#{fn}"
unless File.exists?(path) and File.size(path) > 1000
download(item['preview_url'], path)
if File.size(path) < 1000
puts "filesize < 1 kb"
end
end
path = "#{dest}/#{dir}/user.png"
unless File.exists?(path) and File.size(path) > 1000
download(URI.escape(item['user']['image_urls']['detail'], '[]'), path)
end
comment_offset = 0
comments = []
loop do
path = "#{dest}/data/comments/#{item['id']}-%03d.json" % comment_offset
download("https://lydian.indabamusic.com/submissions/#{item['id']}/comments?offset=#{comment_offset}", path)
comment_response = JSON.parse(File.read(path))
comment_response['data'].each do |comment|
comments.push({
timestamp: comment['time'],
name: comment['user']['name'],
text: comment['body']
})
end
comment_offset += comment_response['data'].count
break if comment_offset >= item['comment_count']
end
comments = comments.uniq.sort_by { |comment| comment[:timestamp] }
open("#{dest}/#{dir}/info.txt", 'wb') do |file|
file << "#{fn}\n"
file << "By #{item['user']['name']}\n"
if item['user']['location_city'] or item['user']['location_state'] or item['user']['location_country_code']
file << "From #{item['user']['location_city']}"
file << ", #{item['user']['location_state']}" if item['user']['location_state'] and not item['user']['location_state'].empty?
if item['user']['location_country_code']
country = IsoCountryCodes.find(item['user']['location_country_code'])
file << ", #{country.name}"
end
end
file << "\n"
time = item['duration'] / 1000
mm, ss = time.divmod(60)
file << "Duration: %d:%02d\n" % [mm, ss]
file << "\n"
file << "Description:\n"
file << "#{item['description']}\n"
file << "\n"
file << "Votes: #{item['voter_count']} (#{item['member_voter_count']} member votes)\n"
file << "Plays: #{item['listen_count']}\n"
file << "\n"
file << "\n"
file << "#{comments.count} comments:\n"
file << "\n"
comments.each do |comment|
time = comment[:timestamp] / 1000
mm, ss = time.divmod(60)
file << "%s said at %d:%02d:\n" % [comment[:name], mm, ss]
file << "#{comment[:text]}\n"
file << "\n"
end
end
end
offset += response['data'].count
break if offset >= opp['data']['submission_count']
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment