This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
// starting Puppeteer | |
let retry = 0; | |
let maxRetries = 5; | |
(async function scrape() { | |
retry++; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
(async function scrape() { | |
const browser = await puppeteer.launch({ headless: false }); | |
const page = await browser.newPage(); | |
await page.goto('https://quotes.toscrape.com/search.aspx'); | |
await page.waitForSelector('#author'); | |
await page.select('#author', 'Albert Einstein'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'kimurai' | |
class Job_Scraper < Kimurai::Base | |
@name= 'acc_job_scraper' | |
@start_urls = ["https://www.indeed.com/jobs?q=accountant&l=Washington%2C+DC"] | |
@engine = :mechanize | |
@@jobs = [] | |
def scrape_job_details |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def scrape_job_details | |
web_page = browser.current_response | |
job_list = web_page.css('td#resultsCol') | |
job_list.css('div.jobsearch-SerpJobCard').each do |char_element| | |
title = char_element.css('h2 a')[0].attributes["title"].value.gsub(/\n/, "") | |
company = description = char_element.css('span.company').text.gsub(/\n/, "") | |
salary = char_element.css('div.salarySnippet').text.gsub(/\n/, "") | |
job_details = [title, company, salary] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "Nokogiri" | |
require "httparty" | |
class Scraper | |
@@shoes = [] | |
page = HTTParty.get("https://www.nike.com/w/mens-nike-by-you-lifestyle-shoes-13jrmz6ealhznik1zy7ok") | |
@parse_page ||= Nokogiri::HTML(page) | |
@parse_page.css('div.product-card__info').each do |char_element| | |
title = char_element.css("div.product-card__title").text.gsub(/\n/, "") | |
subtitle = char_element.css("div.product-card__subtitle").text.gsub(/\n/, "") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require 'vendor/autoload.php'; | |
$client = new \Goutte\Client(); | |
$crawler = $client->request('GET', 'https://www.imdb.com/title/tt2015381/reviews?ref_=tt_urv'); | |
$results = []; | |
$results = $crawler->filter('.title')->each(function ($node) use ($results) { | |
array_push($results, $node->text()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$xmlFile = ‘MovieReview.xml’; | |
$handle = fopen($xmlFile, ‘w’) or die('Unable to open the file: '.$xmlFile); | |
if(fwrite($handle, $xml_content)) { | |
echo 'Successfully written to an XML file.'; | |
} | |
else{ | |
echo 'Error in file generating'; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function convertToXML($results, &$xml_user_info){ | |
foreach($results as $key => $value){ | |
if(is_array($results)){ | |
$subnode = $xml_user_info->addChild($key); | |
foreach ($value as $k=>$v) { | |
$xml_user_info->addChild("$k", $v); | |
} | |
}else{ | |
$xml_user_info->addChild("$key",htmlspecialchars("$value")); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$results = array(); | |
if (!empty($html)) { | |
$div_class = $title = ""; | |
$i = 0; | |
foreach ($html->find(".review-container") as $div_class) { | |
//Extract the review title |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
from random import choice | |
def proxy_generator(): | |
response = requests.get("https://sslproxies.org/") | |
soup = BeautifulSoup(response.content, 'html5lib') | |
proxy = {'https': choice(list(map(lambda x:x[0]+':'+x[1], list(zip(map(lambda x:x.text, soup.findAll('td')[::8]), map(lambda x:x.text, soup.findAll('td')[1::8]))))))} | |
return proxy |