Skip to content

Instantly share code, notes, and snippets.

@focaalvarez
Created February 1, 2021 17:16
Show Gist options
  • Save focaalvarez/d4cd9f202fa02e1c5e8b383a316b739f to your computer and use it in GitHub Desktop.
Save focaalvarez/d4cd9f202fa02e1c5e8b383a316b739f to your computer and use it in GitHub Desktop.
import requests
import pandas as pd
from bs4 import BeautifulSoup
from PIL import Image
#Scrapping images and extracting color
website = "https://masglo.com/product-category/esmaltes/"
product_images=[]
for i in range(2,11):
# download page
website2="https://masglo.com/product-category/esmaltes/"+"page/"+str(i)+"/"
result = requests.get(website2)
# if successful parse the download into a BeautifulSoup object
if result.status_code == 200:
soup = BeautifulSoup(result.content, "html.parser")
# find the images within the page
table2 = soup.find_all(itemprop="image")
for j in table2:
product_images.append(j['src'])
#Then you need to extract the name from the url using some regular expressions
colors=pd.DataFrame(product_images,columns=['url'])
colors['name']=colors['url'].str.rsplit('uploads/',expand=True)[1]
colors['name']=colors['name'].str.rsplit('-37',expand=True)[0]
#Extract RGB colors
color_list_r=[]
color_list_g=[]
color_list_b=[]
for i in colors['url']:
filename=i
img = Image.open(requests.get(filename, stream=True).raw)
rgb_img = img.convert('RGB')
r, g, b = rgb_img.getpixel((188, 422)) #this pixel happens to contain always the color of the product
color_list_r.append(r)
color_list_g.append(g)
color_list_b.append(b)
colors['r']=color_list_r
colors['g']=color_list_g
colors['b']=color_list_b
#Converting RGBs to HEX
def rgb2hex(r,g,b):
return "#{:02x}{:02x}{:02x}".format(r,g,b)
colors['HEX']=colors.apply(lambda row: rgb2hex(row['r'],row['g'],row['b']),axis=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment