Skip to content

Instantly share code, notes, and snippets.

Avatar
🎯
Focusing

lobstr lobstrio

🎯
Focusing
View GitHub Profile
@lobstrio
lobstrio / download_ebooks_from_onion_link_python3_requests.py
Created March 24, 2023 17:23
Download free anarchist ebooks from an .onion site with Python3 and requests 🧅
View download_ebooks_from_onion_link_python3_requests.py
@lobstrio
lobstrio / pappers_pdf_parser_with_python_and_tika.py
Created November 14, 2022 18:30
pappers_pdf_parser_with_python_and_tika.py
View pappers_pdf_parser_with_python_and_tika.py
from tika import parser
import re
import csv
HEADERS = ['numero_gestion', 'a_jour_au', 'numero_rcs', 'date_immatriculation', 'raison_sociale', 'forme_juridique', 'capital_social', 'adresse_siege', 'activites_principals']
def parse_pdf(filename):
# request
raw = parser.from_file(filename)
@lobstrio
lobstrio / google_maps_scraping_selenium.py
Created August 3, 2021 17:31
Collect all data from a Search URL on Google Maps 👋
View google_maps_scraping_selenium.py
# _*_ coding: utf-8 _*°
# Copyright(C) 2021 lobstr
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time
import csv
@lobstrio
lobstrio / lacentrale_scraper.py
Created April 15, 2021 18:33
Collect BMW vehicle data on lacentrale.fr
View lacentrale_scraper.py
# -*- coding: utf-8 -*-
# Copyright(C) 2021 Sasha Bouloudnine
import requests
from lxml import html
import csv
class CrawlerLaCentrale():
@lobstrio
lobstrio / amazon_xmas.py
Created December 20, 2018 13:58
Web Scraping Python Script for the Xmas Deals on Amazon using Requests
View amazon_xmas.py
# -*- coding: utf-8 -*-
# Copyright(C) 2018 Sasha Bouloudnine
import requests
import sys
import re
import ast
import json
import time
@lobstrio
lobstrio / lemonde_headlines.py
Created December 14, 2018 14:36
Extract headlines from French Media website lemonde.fr with Python3, Requests, and lxml
View lemonde_headlines.py
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
import re
import csv
from collections import Counter
class LeMondeScraper:
@lobstrio
lobstrio / twitter_dtrump.py
Last active January 8, 2021 14:13
Really simple Web Scraping Python Script for the first Tweets of Donald Trump using Requests, and lxml
View twitter_dtrump.py
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
def extract():
"""
Export all Tweets from @realDonaldTrump
@lobstrio
lobstrio / pagesjaunes_extract.py
Created November 21, 2018 19:05
Extract name and phone on PageJaunes.fr through Python 3, Request and lxml
View pagesjaunes_extract.py
#!/usr/bin/python3
# coding: utf-8
import requests
import csv
from lxml import html
import datetime
import argparse
@lobstrio
lobstrio / tripadvisor_mail.py
Last active June 2, 2021 15:43
Extract dynamically @mail on Tripadvisor.com, using Python 3, Request, and lxm
View tripadvisor_mail.py
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
import datetime
import re
import argparse
@lobstrio
lobstrio / pdf_parser.py
Created August 16, 2018 16:05
Python 3 script to convert .pdf file into .txt output using PDFMiner
View pdf_parser.py
#!/usr/bin/python3
# coding: utf-8
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.pdfpage import PDFPage
from io import BytesIO
import argparse