Skip to content

Instantly share code, notes, and snippets.

@AcrylicShrimp
Created December 26, 2022 13:27
Show Gist options
  • Save AcrylicShrimp/4c94db38b7d2c4dd2e832a7d53654e42 to your computer and use it in GitHub Desktop.
Save AcrylicShrimp/4c94db38b7d2c4dd2e832a7d53654e42 to your computer and use it in GitHub Desktop.
Korea Steam Deck Parcel Crawler
import asyncio
import aiohttp
import re
import time
from urllib import parse
MUL = 1
PAGE = 1000
BASE = 568587337453
# BASE = 568586360001 6번대는 여기 직전까지 없는거 확인 - 이 번호부터 시작 (12/26)
# BASE = 568587435453 7번대는 여기 직전까지 없는거 확인 - 이 번호부터 시작 (12/26)
URL = "https://m.search.naver.com/p/csearch/ocontent/util/headerjson.nhn"
TOKEN = ''
async def update_token():
global TOKEN
async with aiohttp.ClientSession() as session:
async with session.get(url='https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=1&ie=utf8&query=568587337453') as res:
html = await res.text()
match = re.search('"passportKey":"(\\S+)"', html)
if match is not None:
TOKEN = parse.unquote(match.group(1))
print(f'token={TOKEN}')
async def lookup(dst_company_code: str, dst_waybill_number: str) -> dict:
async with aiohttp.ClientSession() as session:
dcc = dst_company_code
dwn = dst_waybill_number
headers = {
"Host": "m.search.naver.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0",
"Accept": "*/*",
"Accept-Language": "ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3",
"Accept-Encoding": "gzip, deflate, br",
"Referer": f"https://search.naver.com/search.naver?sm=top_sly.hst&fbm=0&ie=utf8&query={dwn}",
"DNT": "1",
"Connection": "keep-alive",
"TE": "Trailers"
}
params = {
"callapi": "parceltracking",
"t_code": dcc,
"t_invoice": dwn,
"passportKey": TOKEN
}
async with session.get(url=URL, headers=headers, params=params) as res:
return await res.json()
class LookupResult:
def __init__(self, res: dict) -> None:
self.invoice = res.get('invoiceNo', '')
self.item = res.get('itemName', '')
self.done = res.get('completeYN', '') == 'Y'
async def query_range(dst_company_code: str, base: int, count: int, multiplier: int):
results = await asyncio.gather(*[lookup(dst_company_code, base + i * multiplier) for i in (range(count) if 0 <= count else range(0, count, -1))])
results = filter(lambda r: 'message' not in r, results)
results = [LookupResult(result) for result in results]
if len(results) == 0:
print('please update the token.')
return filter(lambda r: 'Steam Deck' in r.item, results)
async def main():
await update_token()
global BASE
with open(f'result-{BASE}.csv' if MUL == 1 else f'[x{MUL}] result-{BASE}.csv', 'w') as csv:
csv.write('invoice,item,done\n')
while True:
results = list(await query_range('04', BASE, PAGE, MUL))
print(f'range=[{BASE}..{BASE + PAGE * MUL})')
if len(results) != 0:
total_64 = 0
total_256 = 0
total_512 = 0
received_64 = 0
received_256 = 0
received_512 = 0
for result in results:
if '64' in result.item:
total_64 += 1
received_64 += 1 if result.done else 0
if '256' in result.item:
total_256 += 1
received_256 += 1 if result.done else 0
if '512' in result.item:
total_512 += 1
received_512 += 1 if result.done else 0
for result in results:
csv.write(
f'{result.invoice},{result.item},{result.done}\n')
csv.flush()
print(f'stats of 64={received_64}/{total_64}')
print(f'stats of 256={received_256}/{total_256}')
print(f'stats of 512={received_512}/{total_512}')
BASE += PAGE * MUL
time.sleep(5)
if __name__ == '__main__':
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
asyncio.run(main())
except KeyboardInterrupt:
pass
aiohttp==3.8.3
aiosignal==1.3.1
async-timeout==4.0.2
asyncio==3.4.3
attrs==22.1.0
autopep8==2.0.1
certifi==2022.12.7
charset-normalizer==2.1.1
frozenlist==1.3.3
idna==3.4
multidict==6.0.3
pycodestyle==2.10.0
soupsieve==2.3.2.post1
tomli==2.0.1
urllib3==1.26.13
yarl==1.8.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment