Skip to content

Instantly share code, notes, and snippets.

@alperbayram
Created January 12, 2024 11:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alperbayram/5722175e522852d38db3d05286b55a9c to your computer and use it in GitHub Desktop.
Save alperbayram/5722175e522852d38db3d05286b55a9c to your computer and use it in GitHub Desktop.
Colab ile Websitesindeki Tüm Imageleri Çekip Google Drive içerisine Kaydeden Pyhton Scripti
import io
import os
import hashlib
import pandas as pd
import requests
from bs4 import BeautifulSoup
from PIL import Image
from IPython.display import Image as IPImage, display
from pathlib import Path
from google.colab import drive
# Authenticate and mount Google Drive
drive.mount('/content/drive')
def get_content_from_url(url):
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"})
return response.content
def parse_image_urls(content, classes, location, source):
soup = BeautifulSoup(content, 'html.parser')
results = []
for a in soup.findAll(attrs={"class": classes}):
name = a.find(location)
if name and name.get(source) not in results:
results.append(name.get(source))
return results
def save_urls_to_csv(image_urls):
df = pd.DataFrame({"links": image_urls})
df.to_csv("/content/drive/MyDrive/laptopimages/links.csv", index=False, encoding="utf-8")
def save_images_to_drive(image_urls):
for i, image_url in enumerate(image_urls):
response = requests.get(image_url, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"})
image_content = response.content
image = Image.open(io.BytesIO(image_content))
filename = hashlib.sha1(image_content).hexdigest()[:10] + ".png"
file_path = "/content/drive/MyDrive/laptopimages/" + filename
image.save(file_path, "PNG", quality=80)
def main():
url = "https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2334524.m570.l1313&_nkw=laptop&_sacat=0&LH_TitleDesc=0&_osacat=0&_odkw=laptop"
content = get_content_from_url(url)
image_urls = parse_image_urls(
content=content, classes="s-item__image-wrapper", location="img", source="src",
)
# Create the 'laptopimages' folder in Google Drive if it doesn't exist
folder_path = "/content/drive/MyDrive/laptopimages"
Path(folder_path).mkdir(parents=True, exist_ok=True)
save_urls_to_csv(image_urls)
save_images_to_drive(image_urls)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment