Skip to content

Instantly share code, notes, and snippets.

@lobstrio
lobstrio / supreme_extract.py
Created March 7, 2018 14:11
Web Scraping spider for Supreme shop data through Python 3 and Selenium
# python package
import csv
import time
import random
import sys
# selenium package
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@lobstrio
lobstrio / senscritique_extract.py
Created May 3, 2018 12:54
Scraping SensCritique 100 best books list through Python Selenium
# python package
import csv
import time
import random
import codecs
import sys
# selenium package
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@lobstrio
lobstrio / lequipe_worldcup_extract.py
Created May 11, 2018 17:23
Extract World Cup data from french sport media Lequipe through Python and Selenium
# python package
import csv
import time
import random
import codecs
# selenium package
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
#!/usr/bin/python3
# coding: utf-8
import requests
from bs4 import BeautifulSoup
from scrapy import Selector
import csv
def extract(url):
@lobstrio
lobstrio / lemonde_headlines.py
Created December 14, 2018 14:36
Extract headlines from French Media website lemonde.fr with Python3, Requests, and lxml
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
import re
import csv
from collections import Counter
class LeMondeScraper:
@lobstrio
lobstrio / leboncoin_avgprice.py
Created August 2, 2018 16:10
Compute dynamically avg. price of an item on Leboncoin.fr based on the 100 first items, using Python 3 and Request
#!/usr/bin/python3
# coding: utf-8
import requests
from bs4 import BeautifulSoup
from scrapy import Selector
import datetime
import argparse
@lobstrio
lobstrio / leboncoin_chalet.py
Created March 2, 2018 12:50
Python 3 code to scrape leboncoin "chalet" items in Savoie through Scrapy library
import scrapy
import time
class LbcSpider(scrapy.Spider):
name = "chalet_savoie_lbc"
start_urls = [
'https://www.leboncoin.fr/locations_gites/offres/rhone_alpes/savoie/',
]
def parse(self, response):
@lobstrio
lobstrio / twitter_dtrump.py
Last active January 8, 2021 14:13
Really simple Web Scraping Python Script for the first Tweets of Donald Trump using Requests, and lxml
#!/usr/bin/python3
# coding: utf-8
import requests
from lxml import html
def extract():
"""
Export all Tweets from @realDonaldTrump
@lobstrio
lobstrio / amazon_xmas.py
Created December 20, 2018 13:58
Web Scraping Python Script for the Xmas Deals on Amazon using Requests
# -*- coding: utf-8 -*-
# Copyright(C) 2018 Sasha Bouloudnine
import requests
import sys
import re
import ast
import json
import time
@lobstrio
lobstrio / pdf_parser.py
Created August 16, 2018 16:05
Python 3 script to convert .pdf file into .txt output using PDFMiner
#!/usr/bin/python3
# coding: utf-8
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.pdfpage import PDFPage
from io import BytesIO
import argparse