Skip to content

Instantly share code, notes, and snippets.

@jjeanjacques10
Created January 25, 2022 21:40
Show Gist options
  • Save jjeanjacques10/938ab61fa2c6064d42c63bafdf69e3e2 to your computer and use it in GitHub Desktop.
Save jjeanjacques10/938ab61fa2c6064d42c63bafdf69e3e2 to your computer and use it in GitHub Desktop.
Extract mangas from muitomanga.com using python code.
import requests
import os
from bs4 import BeautifulSoup
def get_manga_chapter(manga_name, num_chapters):
'''
Get the chapter images of manga {manga_name} from the website muitomanga.com
'''
for chapter in range(1, num_chapters):
chapter_url = f"https://muitomanga.com/ler/{manga_name}/capitulo-{chapter}"
print("Getting chapter {}".format(chapter_url))
"""
Get the chapter images from the website muitomanga.com using XPath "/html/body/div[1]/script/text()"
"""
page = requests.get(chapter_url)
soup = BeautifulSoup(page.content, 'html.parser')
scripts = soup.find_all("script")
for script in scripts:
if "d" in script.contents[0]:
chapter_images = script.contents[0].split(";")[0].split("=")[1].replace("[", "").replace("]", "").strip()
chapter_images = [clean_images(image) for image in chapter_images.split(",")]
print("Found {} images".format(len(chapter_images)))
dir = f"./manga/{manga_name}/{chapter}"
if not os.path.exists(dir):
os.makedirs(dir)
for image in chapter_images:
print("Downloading {}".format(image))
r = requests.get(f"{image}")
image_file = requests.get(image)
with open(f"{dir}/{image_file.url.split('/')[-1]}", 'wb') as f:
f.write(r.content)
break
def clean_images(image):
return image.replace("\/", "/").replace('["', "").replace('"', "")
if __name__ == '__main__':
get_manga_chapter('ao-no-exorcist', 4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment