Last active
January 24, 2023 21:43
-
-
Save marccane/28451c360bce33e3810e3d6dcfc1edbe to your computer and use it in GitHub Desktop.
Quick script to scrap all products from a miravia.es virtual shop with pickle cache file to quickly change the search filter or sorting parameters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import http.client, sys, json, traceback, pickle, os | |
from os.path import exists | |
productNameFilter = "monster" | |
def loadItemsFromWeb(): | |
itemsBuffer = [] | |
conn = http.client.HTTPSConnection("www.miravia.es") | |
for pageNumber in range(1,22): | |
rawGetParameters = '/la-tramuntana/?q=All-Products&from=wangpu&langFlag=en&pageTypeId=2&page=%d&ajax=true&retcode={"apiName":"gsearch"}&lang=en' | |
rawGetParameters = rawGetParameters % pageNumber | |
conn.request("GET", rawGetParameters) | |
res = conn.getresponse() | |
data = res.read() | |
#print(data) | |
dataDict = json.loads(data) | |
try: | |
success = dataDict['success'] | |
if success: | |
print("Page %d loaded successfully" % pageNumber) | |
items = dataDict['mods']['listItems'] | |
itemsBuffer.extend(items) #append all items from the current page to the global items list | |
else: | |
print("Server response was not successfull") | |
print(data) | |
except: | |
print("ERROR! Response was: " + data) | |
traceback.print_exc() | |
sys.exit(2) | |
return itemsBuffer | |
pickleFilename = "miravia_items.pick" | |
if not exists(pickleFilename): | |
print("Pickle file not found, loading all items from web!") | |
loadedItems = loadItemsFromWeb() | |
with open(pickleFilename, "wb") as pickleFile: | |
pickle.dump(loadedItems, pickleFile) | |
itemsBuffer = [] | |
with open(pickleFilename, "rb") as pickleFile: | |
itemsBuffer = pickle.load(pickleFile) | |
print("Elements before filter %d" % len(itemsBuffer)) | |
itemsBuffer = [*filter(lambda x: productNameFilter in x["name"].lower(), itemsBuffer)] | |
print("Elements after filter %d" % len(itemsBuffer)) | |
itemsBuffer = sorted(itemsBuffer, key=lambda x: float(x["price"])) | |
print() | |
print("RESULTS:") | |
for item in itemsBuffer: | |
print(item["name"], item["priceShow"]) | |
if os.name == "nt": | |
input() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment