Created
July 28, 2009 02:25
-
-
Save lwu/156885 to your computer and use it in GitHub Desktop.
Experiment with Wikipedia opensearch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
#!/usr/bin/env ruby # MacPorts 1.87 | |
air = { | |
:input => ["JFK", "LGA", "EWR"], | |
:expected => ["JFK Airport", "LaGuardia Airport", "Newark Liberty International Airport"] | |
} | |
# Note that opensearch "Mount Whitney NGS" returns empty, but the Wikipedia on-site search tries to return something! | |
fourteeners = { | |
:input => ["Mount Whitney NGS", "Mount Williamson", "White Mountain Peak NGS", "North Palisade", | |
"Mount Shasta NGS", "Mount Sill", "Mount Russell", "Split Mountain NGS", | |
"Mount Langley", "Mount Tyndall", "Mount Muir", "Middle Palisade" | |
], | |
:expected => ["Mount Whitney", "Mount Williamson", "White Mountain (California)", "North Palisade", | |
"Mount Shasta", "Mount Sill", "Mount Russell (California)", "Split Mountain (Sierra Nevada)", | |
"Mount Langley", "Mount Tyndall", "Mount Muir", "Middle Palisade" | |
], | |
} | |
require 'rubygems' | |
require 'opensearch' | |
require 'hpricot' | |
require 'open-uri' | |
require 'json' | |
require 'cgi' | |
# wikipedia = OpenSearch::OpenSearch.new "INSERT_WEB_ADDRESS_HERE/opensearch/wikipedia.xml" | |
$data_dir = "~/data" | |
# def search(engine, query) | |
# feed = engine.search(query) | |
# puts feed | |
# doc = Hpricot(feed) | |
# res = doc/".mw-search-results li a" | |
# href = res[0]['href'] # what if nil? | |
# # if -t file exist blah | |
# # `cd #$data_dir; curl -O http://en.wikipedia.org#{href}` | |
# # don't crawl for now, just get the top hit. we're going to use Wikipedia as the canonical answer | |
# puts href | |
# end | |
def search_wikipedia(query) | |
query = CGI.escape(query) | |
sleep 0.05 | |
wuri = "http://en.wikipedia.org/w/api.php?action=opensearch&search=#{query}" | |
open(wuri) do |f| | |
return JSON.parse(f.read) | |
end | |
end | |
def test(it) | |
out = it[:input].map do |term| # factor this out? pass in the result vector + desc? | |
query, results = *search_wikipedia(term) | |
puts results.join('; ') | |
wp_top = results[0] | |
end | |
fail = 0 | |
out.each_with_index do |article, index| | |
expected = it[:expected][index]; found = article | |
if found != expected then | |
found = '(empty set)' if found.empty? | |
puts "Expected '#{expected}', found '#{found}'" | |
fail += 1 | |
else | |
puts "Pass '#{found}'" | |
end | |
end | |
totes = it[:input].size | |
puts "(Wikipedia) # Fail = #{fail}/#{totes}! \n" | |
end | |
# test(air) | |
test(fourteeners) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment