Skip to content

Instantly share code, notes, and snippets.

@zloyrusskiy
Created June 22, 2015 20:17
Show Gist options
  • Save zloyrusskiy/99cc8661c8b5d59f9199 to your computer and use it in GitHub Desktop.
Save zloyrusskiy/99cc8661c8b5d59f9199 to your computer and use it in GitHub Desktop.
wiki_path
require 'nokogumbo'
require 'uri'
def print_parents visited, path
parent = path
while parent = visited[parent]
puts "parent: %s" % URI.unescape(parent)
end
end
def find_links url
puts URI.unescape(url)
doc = Nokogiri::HTML5.get(url)
links = doc.css('a')
links
.map { |link| link['href'] }
.compact
.select { |h| h.start_with? '/wiki/' }
.reject { |h| h.include? ':' or h.include? '#'}
end
def search_path from_term, to_term, lang
queue = []
visited = {}
from_path = URI.escape("/wiki/#{from_term}")
to_path = URI.escape("/wiki/#{to_term}")
visited[from_path] = nil
queue << from_path
while queue.any?
path = queue.shift
new_paths = find_links get_url(path, lang)
new_paths.each do |p|
unless visited.has_key? p
visited[p] = path
queue << p
end
if p == to_path
puts "\n>> Found: #{URI.unescape(p)}"
print_parents visited, p
return
end
end
end
puts "not found"
end
def get_url path, lang
"https://%s.wikipedia.org%s" % [lang, path]
end
search_path('Sort', 'SAP', 'ru')
@forsaken1
Copy link

На Crystal еще быстрее будет, наверное (пришел отсюда http://habrahabr.ru/post/260883/#comment_8472729)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment