Skip to content

Instantly share code, notes, and snippets.

@Xetera
Last active August 11, 2022 09:58
Show Gist options
  • Save Xetera/d50af9c42615d66d55755b3708c2a70e to your computer and use it in GitHub Desktop.
Save Xetera/d50af9c42615d66d55755b3708c2a70e to your computer and use it in GitHub Desktop.
Scraping post data from weverse's new naver api using an hmac
from hashlib import sha1
import hmac
import time
import base64
import requests
import urllib.parse
js_link_regex = re.compile("src=\"(.+\/main.*\.js)\"")
secret_key_regex = re.compile("return\s?\"([a-fA-F0-9]+)\"")
def get_secret():
result = requests.get("https://weverse.io")
matches = js_link_regex.findall(result.text)
if not matches:
raise Exception("No matching main bundle links found")
[js_link] = matches
js_result = requests.get(js_link)
js_matches = secret_key_regex.findall(js_result.text)
if not js_matches:
raise Exception("Could not find any secret keys matching the pattern")
# Usually the first hex value that looks like a secret key is the actual key
return next(match for match in js_matches if len(match) > 10)
app_id = "be4d79eb8fc7bd008ee82c8ec4ff6fd4"
# This value changes periodically so it needs to be dynamically retrieved from the js bundle
active = get_secret()
url = f"/post/v1.0/community-14/artistTabPosts?fieldSet=postsV1&limit=20&pagingType=CURSOR&appId={app_id}&language=en&platform=WEB&wpf=pc"
url_hash = url[:255]
print(url_hash)
pad = str(math.floor(time.time() * 1000) - 10)
hashed_url = url_hash + pad
byte_key = bytes(active, "UTF-8")
result = base64.standard_b64encode(hmac.new(byte_key, hashed_url.encode(), sha1).digest()).decode()
wmd = urllib.parse.quote(result)
final_url = f"https://apis.naver.com/weverse/wevweb{url}&wmsgpad={pad}&wmd={wmd}"
print(final_url)
resp = requests.get(final_url,
headers = {
# optional user agent
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
"Referer": "https://www.weverse.io/",
# replace with your auth token. Refer to https://gist.github.com/Xetera/aa59e84f3959a37c16a3309b5d9ab5a0 if you don't know how to get it
"Authorization": "Bearer ..."
})
print(resp.json())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment