pjb3 (owner)

Revisions

gist: 228545 Download_button fork
public
Public Clone URL: git://gist.github.com/228545.git
Embed All Files: show embed
download_gutenberg_top_100.rb #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/env ruby
require 'nokogiri'
require 'open-uri'
 
doc = Nokogiri::HTML(open("http://www.gutenberg.org/browse/scores/top"))
 
doc.search('ol:first li a').each do |a|
  if m = a["href"].match(/etext\/(\d+)/)
    file_name = "#{m[1]}.txt"
    url = "http://www.gutenberg.org/files/#{m[1]}/#{file_name}"
    File.open(file_name, "w") do |f|
      puts "Saving #{url} to #{file_name}..."
      begin
        f << open(url).read
      rescue Exception => ex
        $stderr << "ERROR: #{ex.message}\n"
      end
    end
  end
end