Created
February 10, 2015 23:10
-
-
Save devton/c4b06c3a727220821c2a to your computer and use it in GitHub Desktop.
Crawler::UrlParser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Crawler | |
class UrlParser | |
# Instancia a classe e já faz o parser | |
# retornando um hash com os atributos da URL | |
def self.parse url | |
_ = new url | |
_.parse! | |
end | |
def initialize url | |
@url = URI url | |
# Verifica se a url já tem o scheme definido (http://, https://) | |
# caso não colocamos http:// | |
@url = URI 'http://' + url unless url.starts_with? 'http://', 'https://' | |
end | |
# monta o hash com as informações da URL | |
def parse! | |
{ | |
url_scheme: @url.scheme, | |
host: @url.host, | |
path: url_path, | |
fragment: url_fragment, | |
query_strings: url_queries | |
} | |
end | |
protected | |
# Retorna o path da url | |
# @url = 'http://foo.bar/lorem' | |
# return '/lorem' | |
def url_path | |
return "/" if @url.path.blank? | |
@url.path | |
end | |
# Retorna o fragmento da url | |
# @url = 'http://foo.bar/lorem#foo-bar' | |
# return '#foo-bar' | |
def url_fragment | |
if @url.fragment.present? | |
# seta @queries com o valor da query string na URL | |
# caso ela venha junto com o fragment | |
@fragment, @queries = @url.fragment.split('?') | |
'#' + @fragment | |
end | |
end | |
# Retorna o query_string da url | |
# @url = 'http://foo.bar/lorem?foo=bar&bar=foo' | |
# return 'foo=bar&bar=foo' | |
def url_queries | |
@queries ||= @url.query | |
rescue | |
nil | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment