lobstrio’s gists

## twitter_scraper.py
# =============================================================================
# Title: Twitter Users Tweets Scraper
# Language: Python
# Description: This script does scrape the first 100 tweets
#   of any Twitter User.
# Author: Sasha Bouloudnine
# Date: 2023-08-08
#
# Usage:
# - Make sure you have the required libraries installed by running:

## linkedin_extract.py
# python package
import csv
import time
import random
import sys
import os

# selenium package
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

## captcha-solver.py
#!/usr/bin/python3
# coding: utf-8

import pytesseract
import os
import argparse
try:
    import Image, ImageOps, ImageEnhance, imread
except ImportError:
    from PIL import Image, ImageOps, ImageEnhance

## tripadvisor_mail.py
#!/usr/bin/python3
# coding: utf-8

import requests
from lxml import html
import datetime
import re
import argparse


## bypass_simple_captcha_pytesseract.py
import cv2
from pytesseract import image_to_string

# pip3 install opencv-python
# pip3 install pytesseract
# brew install tesseract

filename = 'lobstr.jpeg'
img = cv2.imread(filename)
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

## coinmarketcap_extract.py
#!/usr/bin/python3
# coding: utf-8

import requests
from bs4 import BeautifulSoup
from scrapy import Selector
import csv
import datetime


## cdiscount_scraper.py
import requests
import re
import json
from lxml import html
import time
from retry import retry
import csv

URL = 'https://www.cdiscount.com/search/10/barbecue.html'

## growthhackingfr_scraper.py
"""
GrowthHacking.fr Forum Scraper

This script is used to scrape data from the GrowthHacking.fr forum, specifically from the "Scraping" category.
It retrieves information about forum topics and saves it as CSV data.

Usage:
1. Install the required library using the following command:
   $ pip install requests

## yelp_scraper_2023.py
import requests
import csv
from lxml import html
import argparse
import time

class YelpSearchScraper:
    def iter_listings(self, url):
        response = requests.get(url)
        if response.status_code != 200:

## 👨‍⚕️ Scrape doctors from doctolib from any search URL and until any page! (072023 version)
from curl_cffi import requests
from lxml import html
import json
import csv
import time
import argparse

HEADERS = {
    'authority': 'www.doctolib.fr',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
	# =============================================================================
	# Title: Twitter Users Tweets Scraper
	# Language: Python
	# Description: This script does scrape the first 100 tweets
	# of any Twitter User.
	# Author: Sasha Bouloudnine
	# Date: 2023-08-08
	#
	# Usage:
	# - Make sure you have the required libraries installed by running:
	# python package
	import csv
	import time
	import random
	import sys
	import os

	# selenium package
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	#!/usr/bin/python3
	# coding: utf-8

	import pytesseract
	import os
	import argparse
	try:
	import Image, ImageOps, ImageEnhance, imread
	except ImportError:
	from PIL import Image, ImageOps, ImageEnhance
	#!/usr/bin/python3
	# coding: utf-8

	import requests
	from lxml import html
	import datetime
	import re
	import argparse
	import cv2
	from pytesseract import image_to_string

	# pip3 install opencv-python
	# pip3 install pytesseract
	# brew install tesseract

	filename = 'lobstr.jpeg'
	img = cv2.imread(filename)
	gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	import requests
	import re
	import json
	from lxml import html
	import time
	from retry import retry
	import csv

	URL = 'https://www.cdiscount.com/search/10/barbecue.html'
	"""
	GrowthHacking.fr Forum Scraper

	This script is used to scrape data from the GrowthHacking.fr forum, specifically from the "Scraping" category.
	It retrieves information about forum topics and saves it as CSV data.

	Usage:
	1. Install the required library using the following command:
	$ pip install requests
	from curl_cffi import requests
	from lxml import html
	import json
	import csv
	import time
	import argparse

	HEADERS = {
	'authority': 'www.doctolib.fr',
	'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7',