Last active
April 1, 2024 00:53
-
-
Save marethyu/62ecff061e0d43b779ebf873ce2f0c41 to your computer and use it in GitHub Desktop.
Script to fetch the latest chapter of a series in Comicwalker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import json | |
import os | |
import requests | |
import sys | |
from bs4 import BeautifulSoup | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' | |
} | |
CW_API_ENDPOINT = 'https://comic-walker.com/api' | |
def get_latest_episode_id(cw_url): | |
page = requests.get(cw_url, headers=headers) | |
soup = BeautifulSoup(page.content, 'html.parser') | |
json_string = soup.find(id='__NEXT_DATA__') | |
data = json.loads(json_string.contents[0]) | |
latestEpisodes = data['props']['pageProps']['dehydratedState']['queries'][0]['state']['data']['latestEpisodes'] | |
return latestEpisodes['result'][0]['id'] | |
def generate_key(hash): | |
return bytes.fromhex(hash[0:16]) | |
if __name__ == '__main__': | |
if (len(sys.argv)) < 3: | |
print('Usage: cw_fetch_latest.py series-url output-directory') | |
sys.exit() | |
id = get_latest_episode_id(sys.argv[1]) | |
output_directory = sys.argv[2] | |
print(f'Latest chapter id: {id}') | |
url = CW_API_ENDPOINT + '/contents/viewer?episodeId=' + id + '&imageSizeType=width%3A1284' | |
response = requests.get(url, headers=headers).json() | |
images_raw = response['manuscripts'] | |
if not os.path.exists(output_directory): | |
os.makedirs(output_directory) | |
print(f'Starting to download the latest chapter to {output_directory}') | |
npages = len(images_raw) | |
for i, raw_image in enumerate(images_raw): | |
print(f'Downloading page {i + 1}/{npages}') | |
xor_key = generate_key(raw_image['drmHash']) | |
raw_data = requests.get(raw_image['drmImageUrl']).content | |
with open('{}/{:d}.jpg'.format(output_directory, i), "wb") as f: | |
f.write(bytes([i ^ j for i, j in zip(raw_data, itertools.cycle(xor_key))])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment