Last active
August 29, 2015 14:16
-
-
Save mikker/26ad0a071e9482021f17 to your computer and use it in GitHub Desktop.
numnumnum is a stupid Krak.dk scraper API - http://numnumnum.herokuapp.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require 'bundler/setup' | |
| Bundler.require :default | |
| require 'json' | |
| require 'open-uri' | |
| require 'cgi' | |
| class Info < Struct.new(:type, :name, :phone, :address, :postal_code, :city) | |
| def self.from_person handler | |
| name = handler.css('.hit-name-ellipsis').text.strip | |
| phone = handler.css('.hit-phone-number').text.strip | |
| street = handler.css('.hit-street-address').text.strip | |
| village = handler.css('.hit-postal-place-name') | |
| street << ", #{village.text.strip}" if village.any? | |
| postal_code = handler.css('.hit-postal-code').text.strip | |
| city = handler.css('.hit-address-locality').text.strip | |
| new *[:person, name, phone, street, postal_code, city] | |
| end | |
| def self.from_company handler | |
| name = handler.css('.hit-company-name-ellipsis').text.strip | |
| phone = handler.css('.hit-phone-number').text.strip | |
| street = handler.css('.street-address').text.strip | |
| postal_code = handler.css('.postal-code').text.strip | |
| city = handler.css('.locality').text.strip | |
| new *[:company, name, phone, street, postal_code, city] | |
| end | |
| def to_json *args | |
| to_h.to_json *args | |
| end | |
| end | |
| class Scraper | |
| def self.get_all str | |
| scraper = new str | |
| [scraper.get_people, scraper.get_companies].flatten.compact | |
| end | |
| def initialize str | |
| @number = parse str | |
| end | |
| def get_people | |
| handler = Nokogiri::XML(get "http://www.krak.dk/person/resultat/#{CGI.escape @number}") | |
| handler.css('article.hit').map do |info| | |
| Info.from_person info | |
| end | |
| end | |
| def get_companies | |
| handler = Nokogiri::XML(get "http://www.krak.dk/#{CGI.escape @number}/s%C3%B8g.cs") | |
| handler.css('article.hit').map do |info| | |
| Info.from_company info | |
| end | |
| end | |
| private | |
| def get url | |
| open(url).read | |
| rescue OpenURI::HTTPError => e | |
| nil | |
| end | |
| def parse str | |
| str.to_s | |
| .gsub(/^\+45/, '') | |
| end | |
| end | |
| class Num < Sinatra::Base | |
| get '/' do | |
| content_type "text/plain" | |
| host = env["REQUEST_URI"] | |
| <<-TEXT | |
| usage: $ curl --silent #{host}12345678 | |
| https://gist.github.com/mikker/26ad0a071e9482021f17 | |
| TEXT | |
| end | |
| get '/favicon.ico' do | |
| status 404 | |
| end | |
| get '/:number' do | |
| content_type 'application/json; charset=utf-8' | |
| if result = Scraper.get_all(params[:number]) | |
| result.to_json | |
| else | |
| status 404 | |
| { error: "not found" }.to_json | |
| end | |
| end | |
| end | |
| run Num.new | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A wild Krak API appears