-
-
Save master131/df8557df5ffa7c6ac1cf9d4b98c8ddcd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import re | |
import time | |
# Get product ID from price tag, receipt or product page URL | |
# Price Tag Example: | |
# https://imgur.com/a/bkZ8gdd | |
# ie. product IDs are 970113 (3DS), 8369272 (2DS) and 630745 (2DS XL) as seen from clearance tag & original price tag | |
# Receipt Example: | |
# https://files.ozbargain.com.au/upload/181647/61487/257c6ccd-6fc8-4b68-ac64-ca980934ca08.jpeg | |
# ie. product ID is 15124 (Urbeats) | |
# Product Page Example: | |
# https://www.bigw.com.au/product/tonka-classic-dump-truck/p/276240/ | |
# ie. above product ID is 276240 | |
pid = str(input("Enter BigW product ID: ")) | |
print("\nObtaining store list, please wait..") | |
next_page = "https://www.lasoo.com.au/storelocator/big-w/location/3000,melbourne+vic.html" | |
postcodes = set() | |
postcode_counts = {} | |
while next_page: | |
print(next_page) | |
src = requests.get(next_page).text | |
next_page = None | |
r = re.search('href="([^"]+)" class="next">', src) | |
if r: | |
next_page = "https://www.lasoo.com.au" + r.group(1) | |
for m in re.finditer(r' (\d{4})</td>', src): | |
postcodes.add(m.group(1)) | |
if m.group(1) in postcode_counts: | |
postcode_counts[m.group(1)] += 1 | |
else: | |
postcode_counts[m.group(1)] = 1 | |
print() | |
got = set() | |
t = 0 | |
total = 0 | |
for p in postcodes: | |
total += 1 | |
if postcode_counts[p] <= 0: | |
continue | |
if time.time() - t >= 0.5: | |
print("Checked " + str(total) + " of " + str(len(postcodes)) + " postcodes...") | |
t = time.time() | |
r = requests.get("https://www.bigw.com.au/p/" + pid + u"/ajaxlistinstorestock?postCode=" + p) | |
if r.status_code == 404: | |
print("Invalid product ID!") | |
break | |
src = r.text | |
for m in re.finditer(r"<p class=\"info_store-name info_label\">\s+([^<]+)[\s\S]*?<p class=\"info_msg\">.*?, (\d{4})</p>[\s\S]*?status-moreinfo\">\s+<span class=\"([^\"]+)\">", src): | |
if m.group(3) == "instock": | |
info = m.group(2) + ", " + m.group(1) | |
elif m.group(3) == "lowstock": | |
info = m.group(2) + ", " + m.group(1) + " *" | |
else: | |
continue | |
if info not in got: | |
try: | |
postcode_counts[m.group(2)] -= 1 | |
except KeyError: | |
pass | |
got.add(info) | |
states = { | |
(2000, 2599): 'NSW', | |
(2619, 2899): 'NSW', | |
(2921, 2999): 'NSW', | |
(2600, 2618): 'ACT', | |
(2900, 2920): 'ACT', | |
(3000, 3999): 'VIC', | |
(4000, 4999): 'QLD', | |
(5000, 5799): 'SA', | |
(6000, 6797): 'WA', | |
(7000, 7799): 'TAS', | |
(800, 899): 'NT' | |
} | |
grouped = {'NSW': [], 'ACT': [], 'VIC': [], 'QLD': [], 'SA': [], 'WA': [], 'TAS': [], 'NT': [], '': []} | |
print() | |
for r in sorted(got): | |
p = int(r.split(',')[0]) | |
s = next(filter(lambda x: x[0] <= p <= x[1], states), None) | |
grouped[states[s] if s else ''].append(r) | |
for k in grouped: | |
if len(grouped[k]): | |
print("\n" + k) | |
for r in grouped[k]: | |
print(r) | |
print() | |
input("Press any key to continue...") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment