Skip to content

Instantly share code, notes, and snippets.

@lobstrio
lobstrio / twitter_scraper.py
Last active March 23, 2024 16:21
Collect 100 first tweets from any user at any time — and export to a .csv file 🐦
# =============================================================================
# Title: Twitter Users Tweets Scraper
# Language: Python
# Description: This script does scrape the first 100 tweets
# of any Twitter User.
# Author: Sasha Bouloudnine
# Date: 2023-08-08
#
# Usage:
# - Make sure you have the required libraries installed by running:
@lobstrio
lobstrio / linkedin_extract.py
Created April 24, 2018 11:05
Scraping Linkedin profiles information through Selenium Python
# python package
import csv
import time
import random
import sys
import os
# selenium package
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
@lobstrio
lobstrio / captcha-solver.py
Last active November 28, 2023 07:41
Solving (simple) Captcha, using PyTesseract, PIL, and Python 3
#!/usr/bin/python3
# coding: utf-8
import pytesseract
import os
import argparse
try:
import Image, ImageOps, ImageEnhance, imread
except ImportError:
from PIL import Image, ImageOps, ImageEnhance
@lobstrio
lobstrio / tripadvisor_mail.py
Last active November 23, 2023 13:21
Extract dynamically @mail on Tripadvisor.com, using Python 3, Request, and lxm
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
import datetime
import re
import argparse
@lobstrio
lobstrio / bypass_simple_captcha_pytesseract.py
Created April 14, 2023 19:42
Bypass a (simple) CAPTCHA with Python3 and pytesseract 🤖
import cv2
from pytesseract import image_to_string
# pip3 install opencv-python
# pip3 install pytesseract
# brew install tesseract
filename = 'lobstr.jpeg'
img = cv2.imread(filename)
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@lobstrio
lobstrio / coinmarketcap_extract.py
Last active October 24, 2023 13:26
Extract all cryptocurrencies data from coinmarketcap.com using Python 3 and Request
#!/usr/bin/python3
# coding: utf-8
import requests
from bs4 import BeautifulSoup
from scrapy import Selector
import csv
import datetime
@lobstrio
lobstrio / cdiscount_scraper.py
Last active August 11, 2023 21:57
Scrape all products from a cDiscount hot barbecue category URL 🍖
import requests
import re
import json
from lxml import html
import time
from retry import retry
import csv
URL = 'https://www.cdiscount.com/search/10/barbecue.html'
@lobstrio
lobstrio / growthhackingfr_scraper.py
Created August 11, 2023 17:43
🧙 Scrape all topics from the famous French GrowthHacking.fr forum — 'scraping' category only!
"""
GrowthHacking.fr Forum Scraper
This script is used to scrape data from the GrowthHacking.fr forum, specifically from the "Scraping" category.
It retrieves information about forum topics and saves it as CSV data.
Usage:
1. Install the required library using the following command:
$ pip install requests
@lobstrio
lobstrio / yelp_scraper_2023.py
Created July 17, 2023 10:47
🍝 Collect all listings from Yelp from a Search URL e.g. https://www.yelp.fr/search?find_desc=Pizza&find_loc=marseille — phones included!
import requests
import csv
from lxml import html
import argparse
import time
class YelpSearchScraper:
def iter_listings(self, url):
response = requests.get(url)
if response.status_code != 200:
from curl_cffi import requests
from lxml import html
import json
import csv
import time
import argparse
HEADERS = {
'authority': 'www.doctolib.fr',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',