Skip to content

Instantly share code, notes, and snippets.

@lobstrio
lobstrio / lacentrale_scraper.py
Created April 15, 2021 18:33
Collect BMW vehicle data on lacentrale.fr
# -*- coding: utf-8 -*-
# Copyright(C) 2021 Sasha Bouloudnine
import requests
from lxml import html
import csv
class CrawlerLaCentrale():
@lobstrio
lobstrio / amazon_xmas.py
Created December 20, 2018 13:58
Web Scraping Python Script for the Xmas Deals on Amazon using Requests
# -*- coding: utf-8 -*-
# Copyright(C) 2018 Sasha Bouloudnine
import requests
import sys
import re
import ast
import json
import time
@lobstrio
lobstrio / lemonde_headlines.py
Created December 14, 2018 14:36
Extract headlines from French Media website lemonde.fr with Python3, Requests, and lxml
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
import re
import csv
from collections import Counter
class LeMondeScraper:
@lobstrio
lobstrio / twitter_dtrump.py
Last active January 8, 2021 14:13
Really simple Web Scraping Python Script for the first Tweets of Donald Trump using Requests, and lxml
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
def extract():
"""
Export all Tweets from @realDonaldTrump
@lobstrio
lobstrio / pagesjaunes_extract.py
Created November 21, 2018 19:05
Extract name and phone on PageJaunes.fr through Python 3, Request and lxml
#!/usr/bin/python3
# coding: utf-8
import requests
import csv
from lxml import html
import datetime
import argparse
@lobstrio
lobstrio / tripadvisor_mail.py
Last active November 23, 2023 13:21
Extract dynamically @mail on Tripadvisor.com, using Python 3, Request, and lxm
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
import datetime
import re
import argparse
@lobstrio
lobstrio / pdf_parser.py
Created August 16, 2018 16:05
Python 3 script to convert .pdf file into .txt output using PDFMiner
#!/usr/bin/python3
# coding: utf-8
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.pdfpage import PDFPage
from io import BytesIO
import argparse
@lobstrio
lobstrio / captcha-solver.py
Last active November 28, 2023 07:41
Solving (simple) Captcha, using PyTesseract, PIL, and Python 3
#!/usr/bin/python3
# coding: utf-8
import pytesseract
import os
import argparse
try:
import Image, ImageOps, ImageEnhance, imread
except ImportError:
from PIL import Image, ImageOps, ImageEnhance
@lobstrio
lobstrio / leboncoin_avgprice.py
Created August 2, 2018 16:10
Compute dynamically avg. price of an item on Leboncoin.fr based on the 100 first items, using Python 3 and Request
#!/usr/bin/python3
# coding: utf-8
import requests
from bs4 import BeautifulSoup
from scrapy import Selector
import datetime
import argparse
@lobstrio
lobstrio / coinmarketcap_extract.py
Last active October 24, 2023 13:26
Extract all cryptocurrencies data from coinmarketcap.com using Python 3 and Request
#!/usr/bin/python3
# coding: utf-8
import requests
from bs4 import BeautifulSoup
from scrapy import Selector
import csv
import datetime