Skip to content

Instantly share code, notes, and snippets.

View saasindustries's full-sized avatar

SaaS Industries saasindustries

View GitHub Profile
def data_scraper(request_method, url, **kwargs):
while true:
try:
proxy = proxy_generator()
print("Proxy currently being used: {}".format(proxy))
response = requests.request(request_method, url, proxies=proxy, timeout=7, **kwargs)
break
# if the request is successful, no exception is raised
except:
print("Connection error, looking for another proxy")
def proxy_generator():
response = requests.get("https://sslproxies.org/")
soup = BeautifulSoup(response.content, 'html5lib')
proxy = {'https': choice(list(zip(map(lambda x:x.text, soup.findAll('td')[::8]), map(lambda x:x.text, soup.findAll('td')[1::8]))))))}
return proxy
const options = {
url: "https://www.forextradingbig.com/10-facts-you-must-know-on-online-forex-trading/",
method: "GET",
proxy: proxyGenerator()
};
request(options, function(error, response, html) {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
let article_headings = $("h2").text();
const request = require("request");
const cheerio = require("cheerio");
function proxyGenerator() {
let ip_addresses = [];
let port_numbers = [];
let proxy;
request("https://sslproxies.org/", function(error, response, html) {
if (!error && response.statusCode == 200) {
let ip_addresses = [];
let port_numbers = [];
request("https://sslproxies.org/", function(error, response, html) {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$("td:nth-child(1)").each(function(index, value) {
ip_addresses[index] = $(this).text();
});
const puppeteer = require('puppeteer');
// starting Puppeteer
puppeteer.launch().then(async browser => {
// opening a new page and navigating to Reddit
const page = await browser.newPage();
await page.goto('https://www.reddit.com/r/scraping/');
await page.waitForSelector('body');
let grabPosts = await page.evaluate(() => {
let allPosts = document.body.querySelectorAll('.Post');
// storing the post items in an array then selecting for retrieving content
scrapeItems = [];
allPosts.forEach(item => {
let postTitle = item.querySelector('h3');
const axios = require('axios');
const cheerio = require('cheerio');
//performing a GET request
axios.get('https://www.forextradingbig.com/instaforex-broker-review/')
.then(response => {
//handling the success
const html = response.data;
axios.get('https://www.forextradingbig.com/instaforex-broker-review/').then(response => {
const html = response.data;
})
import java.io.IOException;
import java.net.MalformedURLException;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomNode;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlPage;