bshyong/wiki_description.rb

## wiki_description.rb
require 'open-uri'
require 'nokogiri'
require 'csv'
require 'cgi'

# BASE_URL = 'http://en.wikipedia.org/w/api.php?format=json&action=query&prop=revisions&rvprop=content&titles='
BASE_URL = 'http://en.wikipedia.org/wiki/'

while($input_filename.nil?)
  puts "Enter input filename (should be a CSV)"
  $input_filename = gets.chomp!
end

while($filename.nil?)
  puts "Enter a filename (output will be stored as a CSV)"
  $filename = gets.chomp!
end

output = File.new("#{$filename}.csv", "a+")

CSV.foreach($input_filename) do |row|
  puts "processing #{row[0]}"
  puts url = BASE_URL + row[0].gsub(' ', '_')
  begin
    doc = Nokogiri::HTML(open(url))
    description = doc.css('#mw-content-text').css('p')[0].content.split('.')[0,2].join('. ')
    output.puts(CSV.generate_line([row[0], description]))
  rescue
    puts "something went wrong!  skipping"
    puts $!.message
  end
end
	require 'open-uri'
	require 'nokogiri'
	require 'csv'
	require 'cgi'

	# BASE_URL = 'http://en.wikipedia.org/w/api.php?format=json&action=query&prop=revisions&rvprop=content&titles='
	BASE_URL = 'http://en.wikipedia.org/wiki/'

	while($input_filename.nil?)
	puts "Enter input filename (should be a CSV)"
	$input_filename = gets.chomp!
	end

	while($filename.nil?)
	puts "Enter a filename (output will be stored as a CSV)"
	$filename = gets.chomp!
	end

	output = File.new("#{$filename}.csv", "a+")

	CSV.foreach($input_filename) do \|row\|
	puts "processing #{row[0]}"
	puts url = BASE_URL + row[0].gsub(' ', '_')
	begin
	doc = Nokogiri::HTML(open(url))
	description = doc.css('#mw-content-text').css('p')[0].content.split('.')[0,2].join('. ')
	output.puts(CSV.generate_line([row[0], description]))
	rescue
	puts "something went wrong! skipping"
	puts $!.message
	end
	end