Skip to content

Instantly share code, notes, and snippets.

@faucct
Created August 12, 2014 10:49
Show Gist options
  • Save faucct/67e5c05fdecb118dbcdf to your computer and use it in GitHub Desktop.
Save faucct/67e5c05fdecb118dbcdf to your computer and use it in GitHub Desktop.
Parse example
def parse_fragrance_notes(fragrance_doc)
fragrance_notes = []
fragrance_doc.xpath('//div[@id="col1"]//div[@style="width: 230px; float: left; text-align: center; clear: left;"]/p').each do |notes_node|
if notes_node.content.include?('Top Notes')
note_type = FragranceNoteType.find_by_name('top')
elsif notes_node.content.include?('Middle Notes')
note_type = FragranceNoteType.find_by_name('middle')
elsif notes_node.content.include?('Base Notes')
note_type = FragranceNoteType.find_by_name('base')
end
notes_node.xpath('span/img').each do |note_img|
note = Note.where(name: note_img['alt'].to_s).first_or_create do |note|
puts "parsing note '#{note.name}'"
logo_url = "http://www.fragrantica.com/images/sastojci/o#{note_img['src'].split("/")[-1][1..-1]}"
begin
note.logo = URI.parse(logo_url)
adapter = Paperclip.io_adapters.for(note.logo)
if Paperclip::MediaTypeSpoofDetector.using(adapter, note.logo.original_filename).spoofed?
note.logo = nil
end
rescue OpenURI::HTTPError
puts 'Broken logo URL: ' + logo_url
end
end
fragrance_note = FragranceNote.new(note_id: note.id)
fragrance_note.fragrance_note_type_id = note_type.id if note_type
fragrance_notes << fragrance_note
end
end
fragrance_notes
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment