Last active
September 9, 2022 09:00
-
-
Save 3ele-projects/5ba1e8007a7ba29e173aaa52ad70f84b to your computer and use it in GitHub Desktop.
Python Selenium Scrap mobile.de
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Um mobile.de Daten abzugreifen muss Selenium genutzt werden. | |
Im Bsp. wurde Python3.8 genutzt sowie der Chrome Driver. Dies dient als Anregung und Skizze | |
Vorraussetzungen: | |
Installierter Chromedriver | |
Python 3.8 | |
pip | |
___ | |
Installation: | |
erstelle einen Ordner: | |
scrap_mobile_de | |
füge in den Ordner app.py und requirements.txt ein. | |
Innerhalb des Ordners: | |
Erzeuge eine virtuelle Umgebung: | |
python -m venv env | |
source env/bin/activate | |
Installiere Abhängigkeiten: | |
pip install -r requirements.txt | |
Ausführen des Scripts: | |
python app.py | |
Erzeugt eine CSV result.csv | |
und fügt das Ergebnis in eine neue Zeile ein. | |
unter scrap_mobile_de/analyse.log | |
befindet sich die Log | |
___ | |
ToDo: | |
Dockerize it | |
Zu Beachten: | |
Der Script sollte angepasst werden auf die URL des Targets. | |
Zu prüfen ist ob die Chrome execute im PATH gesetzt ist. | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import random | |
import time | |
import random | |
import logging | |
import sys | |
import os | |
from pathlib import Path | |
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions | |
from selenium.webdriver.common.action_chains import ActionChains | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver import ActionChains | |
from selenium.webdriver.support import expected_conditions as EC | |
from fake_useragent import UserAgent | |
from bs4 import BeautifulSoup | |
import csv | |
#file = Path.cwd() / sys.argv[1] | |
def setup(): | |
logging.basicConfig( | |
filename='analyse.log', | |
filemode = 'a', | |
level=logging.INFO, | |
datefmt='%m/%d/%Y %I:%M:%S %p', | |
format='%(asctime)s %(message)s' | |
) | |
url = "https://suchen.mobile.de/fahrzeuge/search.html?c=Integrated&c=PartlyIntegrated&isSearchRequest=true&ref=dsp&s=Motorhome&vc=Motorhome" | |
options = Options() | |
ua = UserAgent() | |
userAgent = ua.random | |
options.add_argument("headless") | |
options.add_argument("--window-size=1920x1080") | |
options.add_argument(f'user-agent={userAgent}') | |
driver = webdriver.Chrome(options=options) | |
try: | |
driver.get(url) | |
logging.info('go_to url: ' + url) | |
soup = BeautifulSoup(driver.page_source, 'html.parser') | |
number_string = soup.select('h1')[0].text.strip() | |
number= number_string.replace(" Angebote entsprechen Deinen Suchkriterien","") | |
number = number.replace(".","") | |
with open('result.csv','a') as result: | |
writer = csv.writer(result) | |
writer.writerow([url,number]) | |
except Exception as e: | |
print (e) | |
if __name__ == "__main__": | |
setup() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async-generator==1.10 | |
attrs==21.4.0 | |
beautifulsoup4==4.11.1 | |
bs4==0.0.1 | |
certifi==2022.5.18.1 | |
cffi==1.15.0 | |
charset-normalizer==2.0.12 | |
cryptography==37.0.2 | |
fake-useragent==0.1.11 | |
h11==0.13.0 | |
idna==3.3 | |
outcome==1.2.0 | |
pycparser==2.21 | |
pyOpenSSL==22.0.0 | |
PySocks==1.7.1 | |
requests==2.28.0 | |
selenium==4.2.0 | |
sniffio==1.2.0 | |
sortedcontainers==2.4.0 | |
soupsieve==2.3.2.post1 | |
trio==0.21.0 | |
trio-websocket==0.9.2 | |
urllib3==1.26.9 | |
wsproto==1.1.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment