Last active
May 8, 2021 18:15
-
-
Save mfrazi/7caff417a19aad999199116a6f6f4177 to your computer and use it in GitHub Desktop.
Download Shingeki no Kyojin from https://ww5.readsnk.com/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import os | |
import shutil | |
# Taken from http://stackoverflow.com/questions/23793987/python-write-file-to-directory-doesnt-exist | |
# Taken from http://stackoverflow.com/a/600612/119527 | |
def mkdir_p(path): | |
try: | |
os.makedirs(path) | |
except OSError as exc: # Python >2.5 | |
if os.path.isdir(path): | |
pass | |
else: | |
raise | |
def safe_open_w(path): | |
''' Open "path" for writing, creating any parent directories as needed. | |
''' | |
mkdir_p(os.path.dirname(path)) | |
return open(path, 'w+b') | |
URL = 'https://ww5.readsnk.com/' | |
r = requests.get(URL) | |
responseBody = r.text | |
soup = BeautifulSoup(responseBody, 'html.parser') | |
chapterList = soup.find('tbody', {'class': 'no-border-x'}).find_all('a') | |
for chapter in chapterList: | |
chapterLink = chapter['href'] | |
chapterNumber = chapterLink.partition('chapter-')[2][:-1] | |
if not chapterNumber.isdigit(): | |
continue | |
r = requests.get(chapterLink) | |
chapterBody = r.text | |
soup = BeautifulSoup(chapterBody, 'html.parser') | |
imageList = soup.find_all('img', {'class': 'pages__img'}) | |
counterImage = 1 | |
for image in imageList: | |
imageLink = image['src'].rstrip() | |
imageNumber = '/' + str(format(counterImage, '03')) | |
ext = '.jpg' | |
imageLocation = 'SNK/' + chapterNumber + imageNumber + ext | |
print(imageLocation) | |
try: | |
r = requests.get(imageLink, stream=True) | |
if r.status_code == 200: | |
with safe_open_w(imageLocation) as f: | |
r.raw.decode_content = True | |
shutil.copyfileobj(r.raw, f) | |
except: | |
pass | |
counterImage += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment