Skip to content

Instantly share code, notes, and snippets.

@zeheater
Created April 20, 2020 06:57
Show Gist options
  • Save zeheater/6b1f98a71af0a206f6267fd47111cb88 to your computer and use it in GitHub Desktop.
Save zeheater/6b1f98a71af0a206f6267fd47111cb88 to your computer and use it in GitHub Desktop.
[POC] Extract encrypted image urls from Kissmanga website, using Node.JS VM binding from python
import sys
import cfscrape
from bs4 import BeautifulSoup as BS
import argparse
from node_vm2 import VM
import re
BASE_URL = 'https://kissmanga.com'
args = None
scraper = cfscrape.create_scraper(delay=10)
jsEng = None
def extractImageList(page):
""" Extract list of image from page soup """
imgurls = re.compile('(?<=wrapKA\(\").*(?=\"\))').findall(page)
return list(map(lambda s: jsEng.call('wrapKA',s), imgurls))
def initJsEngineAndExtractUrls(url):
""" Initialize js Engine with cipher key and extract all image urls """
global jsEng
req = scraper.request('get', url=url)
req.raise_for_status()
page = BS(req.text, 'html5lib')
jslist = page.find_all('script')
try:
homejs = next(entry for entry in jslist if 'chko' in entry.decode_contents()).decode_contents().strip()
cajs = scraper.request('get','https://kissmanga.com/Scripts/ca.js').text
lojs = scraper.request('get','https://kissmanga.com/Scripts/lo.js').text
jsEng = VM(code="\n".join([cajs, lojs, homejs])).create()
imglst = extractImageList(req.text)
for entry in imglst : print(entry)
except StopIteration as ex:
print("Can't extract cipher key")
sys.exit(1)
def main():
req = scraper.request(method='get', url=args.url)
req.raise_for_status()
page = BS(req.text, 'html5lib')
chapters = page.find('table', {'class': 'listing'}).find_all('a')
chapters.reverse()
for num, entry in enumerate(chapters):
#TODO: Prettify with column output
print(f'{num:03d}. {entry.text.strip()}')
startindex = int(input('Start Index: '))
initJsEngineAndExtractUrls(f"{BASE_URL}/{chapters[startindex]['href']}")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Kissmanga scraper')
parser.add_argument('url', help='Base url to scrape')
args = parser.parse_args()
scraper.request('get', BASE_URL)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment