Created
October 2, 2020 21:23
-
-
Save Nkemjiks/46e2dd0d575c965012272ce2802712a7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "HTTParty" | |
require 'nokogiri' | |
module Instructions | |
def introductions | |
puts 'Welcome to dev.to webscraper. This CLi tool gathered articles based on the hashtag provided' | |
puts 'If you want to quit, simple type (q) the next time you are prompted to enter a value' | |
puts 'Please provide a hashtag to continue..' | |
puts '' | |
end | |
def quit_message | |
puts 'You have quit the scraper' | |
end | |
def invalid_entry | |
puts 'Invalid entry, try again' | |
end | |
end | |
class Scraper | |
extend Instructions | |
def self.get_input | |
user_input = gets.chomp | |
get_hashtag(user_input) | |
end | |
def self.get_hashtag(user_input) | |
if user_input == 'q' | |
quit_message | |
elsif user_input.empty? | |
invalid_entry | |
get_input | |
else | |
scrape_data(user_input.to_s) | |
end | |
end | |
def self.scrape_data(hashtag) | |
url = "https://dev.to/t/#{hashtag}" | |
puts 'getting data ....' | |
html = HTTParty.get(url) | |
response = Nokogiri::HTML(html) | |
info = [] | |
articles = response.css('.crayons-story__body') | |
if articles.empty? | |
puts "No article for for hashtag: #{hashtag}" | |
else | |
articles.each do |section| | |
title_and_author = section.search('h2.crayons-story__title a', 'div.crayons-story__top p') | |
info.push({ | |
title: title_and_author[0].text.gsub(/\n/, '').strip.gsub(/\s+/, ' '), | |
author: title_and_author[1].text.gsub(/\n/, '').strip.gsub(/\s+/, ' ') | |
}) | |
end | |
end | |
puts info | |
get_input | |
end | |
end | |
Scraper.introductions | |
Scraper.get_input |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment