jjeanjacques10/extract-mangas.py

## extract-mangas.py
import requests
import os
from bs4 import BeautifulSoup


def get_manga_chapter(manga_name, num_chapters):
    '''
    Get the chapter images of manga {manga_name} from the website muitomanga.com
    '''
    for chapter in range(1, num_chapters):
        chapter_url = f"https://muitomanga.com/ler/{manga_name}/capitulo-{chapter}"
        print("Getting chapter {}".format(chapter_url))
        """
        Get the chapter images from the website muitomanga.com using XPath "/html/body/div[1]/script/text()"
        """
        page = requests.get(chapter_url)
        soup = BeautifulSoup(page.content, 'html.parser')

        scripts = soup.find_all("script")

        for script in scripts:
            if "d" in script.contents[0]:
                chapter_images = script.contents[0].split(";")[0].split("=")[1].replace("[", "").replace("]", "").strip()

                chapter_images = [clean_images(image) for image in chapter_images.split(",")]

                print("Found {} images".format(len(chapter_images)))

                dir = f"./manga/{manga_name}/{chapter}"

                if not os.path.exists(dir):
                    os.makedirs(dir)

                for image in chapter_images:
                    print("Downloading {}".format(image))
                    r = requests.get(f"{image}")
                    image_file = requests.get(image)
                    with open(f"{dir}/{image_file.url.split('/')[-1]}", 'wb') as f:
                        f.write(r.content)
                break


def clean_images(image):
    return image.replace("\/", "/").replace('["', "").replace('"', "")


if __name__ == '__main__':
    get_manga_chapter('ao-no-exorcist', 4)
	import requests
	import os
	from bs4 import BeautifulSoup


	def get_manga_chapter(manga_name, num_chapters):
	'''
	Get the chapter images of manga {manga_name} from the website muitomanga.com
	'''
	for chapter in range(1, num_chapters):
	chapter_url = f"https://muitomanga.com/ler/{manga_name}/capitulo-{chapter}"
	print("Getting chapter {}".format(chapter_url))
	"""
	Get the chapter images from the website muitomanga.com using XPath "/html/body/div[1]/script/text()"
	"""
	page = requests.get(chapter_url)
	soup = BeautifulSoup(page.content, 'html.parser')

	scripts = soup.find_all("script")

	for script in scripts:
	if "d" in script.contents[0]:
	chapter_images = script.contents[0].split(";")[0].split("=")[1].replace("[", "").replace("]", "").strip()

	chapter_images = [clean_images(image) for image in chapter_images.split(",")]

	print("Found {} images".format(len(chapter_images)))

	dir = f"./manga/{manga_name}/{chapter}"

	if not os.path.exists(dir):
	os.makedirs(dir)

	for image in chapter_images:
	print("Downloading {}".format(image))
	r = requests.get(f"{image}")
	image_file = requests.get(image)
	with open(f"{dir}/{image_file.url.split('/')[-1]}", 'wb') as f:
	f.write(r.content)
	break


	def clean_images(image):
	return image.replace("\/", "/").replace('["', "").replace('"', "")


	if __name__ == '__main__':
	get_manga_chapter('ao-no-exorcist', 4)