Skip to content

Instantly share code, notes, and snippets.

@jmarsh24
Created March 8, 2021 22:04
Show Gist options
  • Save jmarsh24/09bba91d88717737f845dbc2d83b3535 to your computer and use it in GitHub Desktop.
Save jmarsh24/09bba91d88717737f845dbc2d83b3535 to your computer and use it in GitHub Desktop.
(byebug) Faraday.get(RECODO_URL_PREFIX + @id.to_s + RECODO_URL_SUFFIX)
#<Faraday::Response:0x00007f8c30283ea8 @on_complete_callbacks=[], @env=#<Faraday::Env @method=:get @url=#<URI::HTTPS https://www.el-recodo.com/music?id=%7B%3Aid%3D%3E1%7D&lang=en> @request=#<Faraday::RequestOptions (empty)> @request_headers={"User-Agent"=>"Faraday v1.1.0"} @ssl=#<Faraday::SSLOptions verify=true> @response=#<Faraday::Response:0x00007f8c30283ea8 ...> @response_headers={"date"=>"Mon, 08 Mar 2021 22:00:49 GMT", "content-type"=>"text/html; charset=UTF-8", "transfer-encoding"=>"chunked", "connection"=>"keep-alive", "server"=>"Apache", "content-location"=>"music.php", "vary"=>"negotiate", "tcn"=>"choice", "x-powered-by"=>"PHP/7.3", "set-cookie"=>"PHPSESSID=42423b2d38b4be296c7c9b162e837e37; path=/", "expires"=>"Thu, 19 Nov 1981 08:52:00 GMT", "cache-control"=>"no-store, no-cache, must-revalidate", "pragma"=>"no-cache", "location"=>"404", "content-security-policy"=>"upgrade-insecure-requests"} @status=302 @reason_phrase="Found" @response_body="">>
(byebug) ap Faraday.get(RECODO_URL_PREFIX + @id.to_s + RECODO_URL_SUFFIX)
{
:status => 302,
:body => "",
:response_headers => {
"date" => "Mon, 08 Mar 2021 22:00:55 GMT",
"content-type" => "text/html; charset=UTF-8",
"transfer-encoding" => "chunked",
"connection" => "keep-alive",
"server" => "Apache",
"content-location" => "music.php",
"vary" => "negotiate",
"tcn" => "choice",
"x-powered-by" => "PHP/7.3",
"set-cookie" => "PHPSESSID=6f0708f31c27068c2dda0b24fe81985f; path=/",
"expires" => "Thu, 19 Nov 1981 08:52:00 GMT",
"cache-control" => "no-store, no-cache, must-revalidate",
"pragma" => "no-cache",
"location" => "404",
"content-security-policy" => "upgrade-insecure-requests"
}
}
nil
(byebug)
class Song::RecodoLyrics::Page
RECODO_URL_PREFIX = "https://www.el-recodo.com/music?id=".freeze
RECODO_URL_SUFFIX = "&lang=en".freeze
RECODO_LYRICS_HTML_MARKER = "p#geniusText".freeze
RECODO_METADATA_HTML_MARKER = "div.list-group.lead a".freeze
def initialize(id)
@id = id
end
def update_song_from_lyrics
return if lyrics.blank?
song = ::Song.where("unaccent(title) ILIKE unaccent(?) AND unaccent(artist) ILIKE unaccent(?)", title, artist).first
return unless song
song.update(el_recodo_song_id: @id, lyrics: lyrics)
end
private
def artist
return if metadata[3].blank?
@artist ||= metadata[3].text.strip.split[1..].join(" ")
end
def title
return if metadata[1].blank?
@title ||= metadata[1].text.strip.split[1..].join(" ")
end
def metadata
@metadata ||= parsed_body.css(RECODO_METADATA_HTML_MARKER)
end
def lyrics
@lyrics ||= parsed_body.css(RECODO_LYRICS_HTML_MARKER).try(:text)
end
def parsed_body
@parsed_body ||= ::Nokogiri::HTML(raw.body)
end
def raw
::Faraday.get(RECODO_URL_PREFIX + @id.to_s + RECODO_URL_SUFFIX)
end
end
class Song::RecodoLyrics
SONG_ID_MAX_NUMBER = 20_000
class << self
delegate :fetch, to: :new
end
def fetch
(1..SONG_ID_MAX_NUMBER).each do |id|
fetch_page(id)
end
end
private
def fetch_page(id)
Rails.logger.info "Page Number: #{id}"
page = Page.new(id: id)
page.update_song_from_lyrics
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment