lwu (owner)

Revisions

gist: 156885 Download_button fork
public
Description:
Experiment with Wikipedia opensearch
Public Clone URL: git://gist.github.com/156885.git
Embed All Files: show embed
geo_test.rb #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/ruby
#!/usr/bin/env ruby # MacPorts 1.87
 
 
air = {
  :input => ["JFK", "LGA", "EWR"],
  :expected => ["JFK Airport", "LaGuardia Airport", "Newark Liberty International Airport"]
}
 
# Note that opensearch "Mount Whitney NGS" returns empty, but the Wikipedia on-site search tries to return something!
fourteeners = {
  :input => ["Mount Whitney NGS", "Mount Williamson", "White Mountain Peak NGS", "North Palisade",
             "Mount Shasta NGS", "Mount Sill", "Mount Russell", "Split Mountain NGS",
             "Mount Langley", "Mount Tyndall", "Mount Muir", "Middle Palisade"
            ],
  :expected => ["Mount Whitney", "Mount Williamson", "White Mountain (California)", "North Palisade",
                "Mount Shasta", "Mount Sill", "Mount Russell (California)", "Split Mountain (Sierra Nevada)",
                "Mount Langley", "Mount Tyndall", "Mount Muir", "Middle Palisade"
               ],
}
 
require 'rubygems'
require 'opensearch'
require 'hpricot'
require 'open-uri'
require 'json'
require 'cgi'
 
# wikipedia = OpenSearch::OpenSearch.new "INSERT_WEB_ADDRESS_HERE/opensearch/wikipedia.xml"
 
$data_dir = "~/data"
 
# def search(engine, query)
# feed = engine.search(query)
# puts feed
 
# doc = Hpricot(feed)
# res = doc/".mw-search-results li a"
# href = res[0]['href'] # what if nil?
  
# # if -t file exist blah
# # `cd #$data_dir; curl -O http://en.wikipedia.org#{href}`
 
# # don't crawl for now, just get the top hit. we're going to use Wikipedia as the canonical answer
# puts href
# end
 
def search_wikipedia(query)
  query = CGI.escape(query)
  sleep 0.05
  wuri = "http://en.wikipedia.org/w/api.php?action=opensearch&search=#{query}"
  open(wuri) do |f|
    return JSON.parse(f.read)
  end
end
 
def test(it)
  out = it[:input].map do |term| # factor this out? pass in the result vector + desc?
    query, results = *search_wikipedia(term)
    puts results.join('; ')
    wp_top = results[0]
  end
 
  fail = 0
  out.each_with_index do |article, index|
    expected = it[:expected][index]; found = article
    if found != expected then
      found = '(empty set)' if found.empty?
      puts "Expected '#{expected}', found '#{found}'"
      fail += 1
    else
      puts "Pass '#{found}'"
    end
  end
 
  totes = it[:input].size
 
  puts "(Wikipedia) # Fail = #{fail}/#{totes}! \n"
end
 
# test(air)
test(fourteeners)