Skip to content

Instantly share code, notes, and snippets.

@mfrazi
Last active May 8, 2021 18:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mfrazi/7caff417a19aad999199116a6f6f4177 to your computer and use it in GitHub Desktop.
Save mfrazi/7caff417a19aad999199116a6f6f4177 to your computer and use it in GitHub Desktop.
Download Shingeki no Kyojin from https://ww5.readsnk.com/
from bs4 import BeautifulSoup
import requests
import os
import shutil
# Taken from http://stackoverflow.com/questions/23793987/python-write-file-to-directory-doesnt-exist
# Taken from http://stackoverflow.com/a/600612/119527
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if os.path.isdir(path):
pass
else:
raise
def safe_open_w(path):
''' Open "path" for writing, creating any parent directories as needed.
'''
mkdir_p(os.path.dirname(path))
return open(path, 'w+b')
URL = 'https://ww5.readsnk.com/'
r = requests.get(URL)
responseBody = r.text
soup = BeautifulSoup(responseBody, 'html.parser')
chapterList = soup.find('tbody', {'class': 'no-border-x'}).find_all('a')
for chapter in chapterList:
chapterLink = chapter['href']
chapterNumber = chapterLink.partition('chapter-')[2][:-1]
if not chapterNumber.isdigit():
continue
r = requests.get(chapterLink)
chapterBody = r.text
soup = BeautifulSoup(chapterBody, 'html.parser')
imageList = soup.find_all('img', {'class': 'pages__img'})
counterImage = 1
for image in imageList:
imageLink = image['src'].rstrip()
imageNumber = '/' + str(format(counterImage, '03'))
ext = '.jpg'
imageLocation = 'SNK/' + chapterNumber + imageNumber + ext
print(imageLocation)
try:
r = requests.get(imageLink, stream=True)
if r.status_code == 200:
with safe_open_w(imageLocation) as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
except:
pass
counterImage += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment