This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Copyright(C) 2021 Sasha Bouloudnine | |
import requests | |
from lxml import html | |
import csv | |
class CrawlerLaCentrale(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# coding: utf-8 | |
import requests | |
import csv | |
from lxml import html | |
import datetime | |
import argparse | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from lxml import html | |
import time | |
print('~~ start') | |
anarchist_library_onion_link = "http://libraryqxxiqakubqv3dc2bend2koqsndbwox2johfywcatxie26bsad.onion/special/index" | |
latest_books_library_onion_link = "http://libraryqxxiqakubqv3dc2bend2koqsndbwox2johfywcatxie26bsad.onion/latest?bare=1" | |
session = requests.session() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tika import parser | |
import re | |
import csv | |
HEADERS = ['numero_gestion', 'a_jour_au', 'numero_rcs', 'date_immatriculation', 'raison_sociale', 'forme_juridique', 'capital_social', 'adresse_siege', 'activites_principals'] | |
def parse_pdf(filename): | |
# request | |
raw = parser.from_file(filename) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# _*_ coding: utf-8 _*° | |
# Copyright(C) 2021 lobstr | |
from selenium import webdriver | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.common.by import By | |
from selenium.common.exceptions import NoSuchElementException | |
import time | |
import csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
import html2text | |
import re | |
import argparse | |
OPENAI_API_KEY = 'YOUR_OPEN_AI_API_KEY' | |
COMPLETION_URL = 'https://api.openai.com/v1/chat/completions' | |
PROMPT = """Find the main article from this product page, and return from this text content, as JSON format: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from curl_cffi import requests | |
from lxml import html | |
import json | |
import csv | |
import time | |
import argparse | |
HEADERS = { | |
'authority': 'www.doctolib.fr', | |
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import csv | |
from lxml import html | |
import argparse | |
import time | |
class YelpSearchScraper: | |
def iter_listings(self, url): | |
response = requests.get(url) | |
if response.status_code != 200: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
GrowthHacking.fr Forum Scraper | |
This script is used to scrape data from the GrowthHacking.fr forum, specifically from the "Scraping" category. | |
It retrieves information about forum topics and saves it as CSV data. | |
Usage: | |
1. Install the required library using the following command: | |
$ pip install requests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import re | |
import json | |
from lxml import html | |
import time | |
from retry import retry | |
import csv | |
URL = 'https://www.cdiscount.com/search/10/barbecue.html' |