Skip to content

Instantly share code, notes, and snippets.

View saasindustries's full-sized avatar

SaaS Industries saasindustries

View GitHub Profile
const puppeteer = require('puppeteer');
// starting Puppeteer
let retry = 0;
let maxRetries = 5;
(async function scrape() {
retry++;
const puppeteer = require('puppeteer');
(async function scrape() {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://quotes.toscrape.com/search.aspx');
await page.waitForSelector('#author');
await page.select('#author', 'Albert Einstein');
require 'kimurai'
class Job_Scraper < Kimurai::Base
@name= 'acc_job_scraper'
@start_urls = ["https://www.indeed.com/jobs?q=accountant&l=Washington%2C+DC"]
@engine = :mechanize
@@jobs = []
def scrape_job_details
def scrape_job_details
web_page = browser.current_response
job_list = web_page.css('td#resultsCol')
job_list.css('div.jobsearch-SerpJobCard').each do |char_element|
title = char_element.css('h2 a')[0].attributes["title"].value.gsub(/\n/, "")
company = description = char_element.css('span.company').text.gsub(/\n/, "")
salary = char_element.css('div.salarySnippet').text.gsub(/\n/, "")
job_details = [title, company, salary]
require "Nokogiri"
require "httparty"
class Scraper
@@shoes = []
page = HTTParty.get("https://www.nike.com/w/mens-nike-by-you-lifestyle-shoes-13jrmz6ealhznik1zy7ok")
@parse_page ||= Nokogiri::HTML(page)
@parse_page.css('div.product-card__info').each do |char_element|
title = char_element.css("div.product-card__title").text.gsub(/\n/, "")
subtitle = char_element.css("div.product-card__subtitle").text.gsub(/\n/, "")
<?php
require 'vendor/autoload.php';
$client = new \Goutte\Client();
$crawler = $client->request('GET', 'https://www.imdb.com/title/tt2015381/reviews?ref_=tt_urv');
$results = [];
$results = $crawler->filter('.title')->each(function ($node) use ($results) {
array_push($results, $node->text());
$xmlFile = ‘MovieReview.xml’;
$handle = fopen($xmlFile, ‘w’) or die('Unable to open the file: '.$xmlFile);
if(fwrite($handle, $xml_content)) {
echo 'Successfully written to an XML file.';
}
else{
echo 'Error in file generating';
}
function convertToXML($results, &$xml_user_info){
foreach($results as $key => $value){
if(is_array($results)){
$subnode = $xml_user_info->addChild($key);
foreach ($value as $k=>$v) {
$xml_user_info->addChild("$k", $v);
}
}else{
$xml_user_info->addChild("$key",htmlspecialchars("$value"));
}
$results = array();
if (!empty($html)) {
$div_class = $title = "";
$i = 0;
foreach ($html->find(".review-container") as $div_class) {
//Extract the review title
import requests
from bs4 import BeautifulSoup
from random import choice
def proxy_generator():
response = requests.get("https://sslproxies.org/")
soup = BeautifulSoup(response.content, 'html5lib')
proxy = {'https': choice(list(map(lambda x:x[0]+':'+x[1], list(zip(map(lambda x:x.text, soup.findAll('td')[::8]), map(lambda x:x.text, soup.findAll('td')[1::8]))))))}
return proxy