Created
January 12, 2024 11:46
-
-
Save alperbayram/5722175e522852d38db3d05286b55a9c to your computer and use it in GitHub Desktop.
Colab ile Websitesindeki Tüm Imageleri Çekip Google Drive içerisine Kaydeden Pyhton Scripti
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import os | |
import hashlib | |
import pandas as pd | |
import requests | |
from bs4 import BeautifulSoup | |
from PIL import Image | |
from IPython.display import Image as IPImage, display | |
from pathlib import Path | |
from google.colab import drive | |
# Authenticate and mount Google Drive | |
drive.mount('/content/drive') | |
def get_content_from_url(url): | |
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"}) | |
return response.content | |
def parse_image_urls(content, classes, location, source): | |
soup = BeautifulSoup(content, 'html.parser') | |
results = [] | |
for a in soup.findAll(attrs={"class": classes}): | |
name = a.find(location) | |
if name and name.get(source) not in results: | |
results.append(name.get(source)) | |
return results | |
def save_urls_to_csv(image_urls): | |
df = pd.DataFrame({"links": image_urls}) | |
df.to_csv("/content/drive/MyDrive/laptopimages/links.csv", index=False, encoding="utf-8") | |
def save_images_to_drive(image_urls): | |
for i, image_url in enumerate(image_urls): | |
response = requests.get(image_url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"}) | |
image_content = response.content | |
image = Image.open(io.BytesIO(image_content)) | |
filename = hashlib.sha1(image_content).hexdigest()[:10] + ".png" | |
file_path = "/content/drive/MyDrive/laptopimages/" + filename | |
image.save(file_path, "PNG", quality=80) | |
def main(): | |
url = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2334524.m570.l1313&_nkw=laptop&_sacat=0&LH_TitleDesc=0&_osacat=0&_odkw=laptop" | |
content = get_content_from_url(url) | |
image_urls = parse_image_urls( | |
content=content, classes="s-item__image-wrapper", location="img", source="src", | |
) | |
# Create the 'laptopimages' folder in Google Drive if it doesn't exist | |
folder_path = "/content/drive/MyDrive/laptopimages" | |
Path(folder_path).mkdir(parents=True, exist_ok=True) | |
save_urls_to_csv(image_urls) | |
save_images_to_drive(image_urls) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment