Skip to content

Instantly share code, notes, and snippets.

@marccane
Last active January 24, 2023 21:43
Show Gist options
  • Save marccane/28451c360bce33e3810e3d6dcfc1edbe to your computer and use it in GitHub Desktop.
Save marccane/28451c360bce33e3810e3d6dcfc1edbe to your computer and use it in GitHub Desktop.
Quick script to scrap all products from a miravia.es virtual shop with pickle cache file to quickly change the search filter or sorting parameters
#!/usr/bin/env python3
import http.client, sys, json, traceback, pickle, os
from os.path import exists
productNameFilter = "monster"
def loadItemsFromWeb():
itemsBuffer = []
conn = http.client.HTTPSConnection("www.miravia.es")
for pageNumber in range(1,22):
rawGetParameters = '/la-tramuntana/?q=All-Products&from=wangpu&langFlag=en&pageTypeId=2&page=%d&ajax=true&retcode={"apiName":"gsearch"}&lang=en'
rawGetParameters = rawGetParameters % pageNumber
conn.request("GET", rawGetParameters)
res = conn.getresponse()
data = res.read()
#print(data)
dataDict = json.loads(data)
try:
success = dataDict['success']
if success:
print("Page %d loaded successfully" % pageNumber)
items = dataDict['mods']['listItems']
itemsBuffer.extend(items) #append all items from the current page to the global items list
else:
print("Server response was not successfull")
print(data)
except:
print("ERROR! Response was: " + data)
traceback.print_exc()
sys.exit(2)
return itemsBuffer
pickleFilename = "miravia_items.pick"
if not exists(pickleFilename):
print("Pickle file not found, loading all items from web!")
loadedItems = loadItemsFromWeb()
with open(pickleFilename, "wb") as pickleFile:
pickle.dump(loadedItems, pickleFile)
itemsBuffer = []
with open(pickleFilename, "rb") as pickleFile:
itemsBuffer = pickle.load(pickleFile)
print("Elements before filter %d" % len(itemsBuffer))
itemsBuffer = [*filter(lambda x: productNameFilter in x["name"].lower(), itemsBuffer)]
print("Elements after filter %d" % len(itemsBuffer))
itemsBuffer = sorted(itemsBuffer, key=lambda x: float(x["price"]))
print()
print("RESULTS:")
for item in itemsBuffer:
print(item["name"], item["priceShow"])
if os.name == "nt":
input()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment