This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def data_scraper(request_method, url, **kwargs): | |
while true: | |
try: | |
proxy = proxy_generator() | |
print("Proxy currently being used: {}".format(proxy)) | |
response = requests.request(request_method, url, proxies=proxy, timeout=7, **kwargs) | |
break | |
# if the request is successful, no exception is raised | |
except: | |
print("Connection error, looking for another proxy") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def proxy_generator(): | |
response = requests.get("https://sslproxies.org/") | |
soup = BeautifulSoup(response.content, 'html5lib') | |
proxy = {'https': choice(list(zip(map(lambda x:x.text, soup.findAll('td')[::8]), map(lambda x:x.text, soup.findAll('td')[1::8]))))))} | |
return proxy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const options = { | |
url: "https://www.forextradingbig.com/10-facts-you-must-know-on-online-forex-trading/", | |
method: "GET", | |
proxy: proxyGenerator() | |
}; | |
request(options, function(error, response, html) { | |
if (!error && response.statusCode == 200) { | |
const $ = cheerio.load(html); | |
let article_headings = $("h2").text(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const request = require("request"); | |
const cheerio = require("cheerio"); | |
function proxyGenerator() { | |
let ip_addresses = []; | |
let port_numbers = []; | |
let proxy; | |
request("https://sslproxies.org/", function(error, response, html) { | |
if (!error && response.statusCode == 200) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let ip_addresses = []; | |
let port_numbers = []; | |
request("https://sslproxies.org/", function(error, response, html) { | |
if (!error && response.statusCode == 200) { | |
const $ = cheerio.load(html); | |
$("td:nth-child(1)").each(function(index, value) { | |
ip_addresses[index] = $(this).text(); | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer'); | |
// starting Puppeteer | |
puppeteer.launch().then(async browser => { | |
// opening a new page and navigating to Reddit | |
const page = await browser.newPage(); | |
await page.goto('https://www.reddit.com/r/scraping/'); | |
await page.waitForSelector('body'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let grabPosts = await page.evaluate(() => { | |
let allPosts = document.body.querySelectorAll('.Post'); | |
// storing the post items in an array then selecting for retrieving content | |
scrapeItems = []; | |
allPosts.forEach(item => { | |
let postTitle = item.querySelector('h3'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const axios = require('axios'); | |
const cheerio = require('cheerio'); | |
//performing a GET request | |
axios.get('https://www.forextradingbig.com/instaforex-broker-review/') | |
.then(response => { | |
//handling the success | |
const html = response.data; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
axios.get('https://www.forextradingbig.com/instaforex-broker-review/').then(response => { | |
const html = response.data; | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.net.MalformedURLException; | |
import com.gargoylesoftware.htmlunit.BrowserVersion; | |
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; | |
import com.gargoylesoftware.htmlunit.WebClient; | |
import com.gargoylesoftware.htmlunit.html.DomNode; | |
import com.gargoylesoftware.htmlunit.html.DomNodeList; | |
import com.gargoylesoftware.htmlunit.html.HtmlPage; |