dhondta/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Pentester Academy Download Tools

This is a set of tools using Tinyscript in order to download resources from pentesteracademy.com and compress videos.

pta-downloader: allows to download resources given some course identifiers while compressing downloaded videos if needed.
video-compressor: allows to compress videos a posteriori.

$ pip install tinyscript
$ tsm install pta-downloader
$ tsm install video-compressor

PTA Downloader

This tool relies on wget and ffmpeg and takes a session cookie on pentesteracademy.com as a first positional argument and then multiple course identifiers as next positional arguments (for a sequential download of multiple tools). A compression ratio can be specified or compression (with default ratio 30) can be enabled for compressing videos. Check out the examples at the end of the help message to see the different usages.
Video Compressor

This short tool relies on ffmpeg and takes a folder as positional argument and a compression ratio (defaults to 30). Check out the examples at the end of the help message to see the different usages.

  
## pta-downloader.py
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from pybots import HTTPBot
from tinyscript import *


__author__ = "Alexandre D'Hondt"
__version__ = "1.0"
__copyright__ = ("A. D'Hondt", 2020)
__license__ = "gpl-3.0"
__doc__ = """
This tool helps download videos of the courses from PentesterAcademy. It can also compress the videos.
"""
__examples__ = [
    "\"__strip_mid=...; SACSID=~...\" 30 --compress",
    "\"__strip_mid=...; SACSID=~...\" 10 11 12 --crf 20",
]

BANNER_FONT  = "standard"
BANNER_STYLE = {'fgcolor': "lolcat"}
SCRIPTNAME_FORMAT = "none"


class PTADownloader(HTTPBot):
    def __init__(self, cookie, *args, **kwargs):
        super(PTADownloader, self).__init__("https://www.pentesteracademy.com", *args, **kwargs)
        self._set_cookie(cookie)

    def count(self):
        self.get("/members?options=accountlogs")
        for p in self.soup.find("div", {'id': "content"}).find_all("p"):
            if p.text.startswith("Total plays allowed based on subscription date "):
                self.total = int(p.find_all("b")[1].text)
            if p.text.startswith("Total plays till date = "):
                self.downloaded = int(p.find("b").text)
        self.logger.warning("%d of %d downloads remaining" % (self.total - self.downloaded, self.total))

    def download(self, course_id, crf=0):
        self.get("/course?id=%d" % course_id)
        title = self.soup.find_all("section", {'class': "static-banner"})[0].text.strip()
        self.logger.info("Course: " + title)
        root = ts.Path("%d_%s" % (course_id, ts.slugify(title)), create=True)
        # first, collect links from the page of the selected course
        video_pages = []
        for media in self.soup.find_all("div", {'class': "media"}):
            for a in media.find_all("a"):
                video_pages.append(a.attrs['href'])
        l, r = len(video_pages), self.total - self.downloaded
        if l > r:
            self.logger.warning("You may have too few downloads remaining to get the full course (remaining: %d ; "
                                "needed: %d)" % (r, l))
        # then, process download links
        processed, i = [], {}
        for video_page in video_pages:
            self.get(video_page)
            # collect unique download links (MP4, ZIP archives with code, examples, etc)
            dlinks = []
            for div in self.soup.find_all("div", {'class': "active"}):
                for a in div.find_all("a"):
                    l = a.attrs['href']
                    if l not in dlinks:
                        dlinks.append(l)
            # now, process them, resolving clash names (e.g. code.zip for multiple modules)
            for dlink in dlinks:
                self.get(dlink, allow_redirects=False)
                try:
                    dlink = self.response.headers['Location']
                except KeyError:
                    continue  # this appends with non-download links
                url = ts.urlparse(dlink)
                u = "%s://%s%s" % (url.scheme, url.netloc, url.path)
                if u in processed:
                    continue
                processed.append(u)
                filename = url.path.split("/")[-1]
                i.setdefault(filename, 0)
                if i[filename] > 0:
                    n, e = os.path.splitext(filename)
                    filename = "%s-%i%s" % (n, i[filename], e)
                i[filename] += 1
                fp = root.joinpath(filename)
                if not fp.exists():
                    PTADownloader.process(dlink, fp, crf)
                else:
                    self.logger.warning("Skipping %s..." % filename)

    @staticmethod
    def execute(cmd):
        logger.debug(" ".join(cmd))
        ts.execute(cmd)

    @staticmethod
    def process(url, path, crf=0):
        logger.info("Getting '%s'..." % path.filename)
        PTADownloader.execute(["wget", "-O", str(path), url])
        if crf > 0 and path.suffix == ".mp4":
            logger.info("Compressing '%s'..." % path.filename)
            TMP = ts.Path(path.dirname, ".tmp.mp4")
            PTADownloader.execute(["ffmpeg", "-i", str(path), "-vcodec", "libx265", "-crf", str(crf), str(TMP)])
            path.remove()
            TMP.rename(path.absolute())


if __name__ == '__main__':
    parser.add_argument("cookie", help="session cookie")
    parser.add_argument("cid", nargs="+", type=ts.pos_int, help="course ID's")
    parser.add_argument("-c", "--compress", action="store_true", help="compress videos after download")
    parser.add_argument("--crf", type=ts.pos_int, default=30, help="compression factor")
    initialize()
    ptad = PTADownloader(args.cookie, args.verbose)
    for cid in args.cid:
        ptad.count()
        ptad.download(cid, args.crf if args.compress or args.crf != 30 else 0)

## requirements.txt
tinyscript>=1.23.15
pybots

## video-compressor.py
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from tinyscript import *


__author__ = "Alexandre D'Hondt"
__version__ = "1.6"
__copyright__ = ("A. D'Hondt", 2020)
__license__ = "gpl-3.0"
__doc__ = """
This tool is a simple wrapper for taking a folder and running ffmpeg on the videos found in order to compress them.
"""
__examples__ = ["my-folder", "my-folder --crf 20"]


BANNER_FONT  = "standard"
BANNER_STYLE = {'fgcolor': "lolcat"}
SCRIPTNAME_FORMAT = "none"

VIDEO_EXT    = [".avi", ".m4v", ".mkv", ".mov", ".mp4", ".wmv"]
CMD_COMPR    = {
    'default': lambda v1, v2, crf: ["ffmpeg -i \"%s\" -vcodec libx265 -crf %d \"%s\"" % (v1, crf, v2)],
    '.m4v':    lambda v1, v2, crf: ["ffmpeg -i \"%s\" -vcodec libx265 -vf scale=720:-1 -crf %d \"%s\"" % \
                                    (v1, crf, v1.dirname.joinpath(".tmp.mp4")),
                                    "ffmpeg -i %s \"%s\"" % (v1.dirname.joinpath(".tmp.mp4"), v2),
                                    "rm -f %s" % v1.dirname.joinpath(".tmp.mp4")],
    '.wmv':    lambda v1, v2, crf: ["ffmpeg -i \"%s\" -vcodec wmv2 -crf %d \"%s\"" % (v1, crf, v2)],
}


def compress(path, crf=0):
    logger.info("Compressing '%s'..." % path)
    TMP = ts.Path(path.dirname, ".tmp" + path.suffix)
    TMP.remove(False)
    ts.Path(path.dirname, ".tmp.mp4").remove(False)
    for cmd in CMD_COMPR.get(path.suffix, CMD_COMPR['default'])(path, TMP, crf):
        logger.debug(cmd)
        _, err, rc = ts.execute(shlex.split(cmd), returncode=True)
        if rc > 0:
            logger.debug(ensure_str(err))
            logger.error("Failed to compress %s" % path)
            return
    path.remove()
    TMP.rename(path.absolute())


if __name__ == '__main__':
    parser.add_argument("folder", help="folder where the videos to be handled are")
    parser.add_argument("--crf", type=ts.pos_int, default=30, help="compression factor")
    initialize()
    filter_videos = lambda p: p.suffix in VIDEO_EXT and p.stem != ".tmp" and not p.stem.startswith("._")
    for fp in ts.Path(args.folder).walk(filter_func=filter_videos):
        compress(fp, args.crf)
	#!/usr/bin/env python
	# -- coding: UTF-8 --
	from pybots import HTTPBot
	from tinyscript import *


	__author__ = "Alexandre D'Hondt"
	__version__ = "1.0"
	__copyright__ = ("A. D'Hondt", 2020)
	__license__ = "gpl-3.0"
	__doc__ = """
	This tool helps download videos of the courses from PentesterAcademy. It can also compress the videos.
	"""
	__examples__ = [
	"\"__strip_mid=...; SACSID=~...\" 30 --compress",
	"\"__strip_mid=...; SACSID=~...\" 10 11 12 --crf 20",
	]

	BANNER_FONT = "standard"
	BANNER_STYLE = {'fgcolor': "lolcat"}
	SCRIPTNAME_FORMAT = "none"


	class PTADownloader(HTTPBot):
	def __init__(self, cookie, args, *kwargs):
	super(PTADownloader, self).__init__("https://www.pentesteracademy.com", args, *kwargs)
	self._set_cookie(cookie)

	def count(self):
	self.get("/members?options=accountlogs")
	for p in self.soup.find("div", {'id': "content"}).find_all("p"):
	if p.text.startswith("Total plays allowed based on subscription date "):
	self.total = int(p.find_all("b")[1].text)
	if p.text.startswith("Total plays till date = "):
	self.downloaded = int(p.find("b").text)
	self.logger.warning("%d of %d downloads remaining" % (self.total - self.downloaded, self.total))

	def download(self, course_id, crf=0):
	self.get("/course?id=%d" % course_id)
	title = self.soup.find_all("section", {'class': "static-banner"})[0].text.strip()
	self.logger.info("Course: " + title)
	root = ts.Path("%d_%s" % (course_id, ts.slugify(title)), create=True)
	# first, collect links from the page of the selected course
	video_pages = []
	for media in self.soup.find_all("div", {'class': "media"}):
	for a in media.find_all("a"):
	video_pages.append(a.attrs['href'])
	l, r = len(video_pages), self.total - self.downloaded
	if l > r:
	self.logger.warning("You may have too few downloads remaining to get the full course (remaining: %d ; "
	"needed: %d)" % (r, l))
	# then, process download links
	processed, i = [], {}
	for video_page in video_pages:
	self.get(video_page)
	# collect unique download links (MP4, ZIP archives with code, examples, etc)
	dlinks = []
	for div in self.soup.find_all("div", {'class': "active"}):
	for a in div.find_all("a"):
	l = a.attrs['href']
	if l not in dlinks:
	dlinks.append(l)
	# now, process them, resolving clash names (e.g. code.zip for multiple modules)
	for dlink in dlinks:
	self.get(dlink, allow_redirects=False)
	try:
	dlink = self.response.headers['Location']
	except KeyError:
	continue # this appends with non-download links
	url = ts.urlparse(dlink)
	u = "%s://%s%s" % (url.scheme, url.netloc, url.path)
	if u in processed:
	continue
	processed.append(u)
	filename = url.path.split("/")[-1]
	i.setdefault(filename, 0)
	if i[filename] > 0:
	n, e = os.path.splitext(filename)
	filename = "%s-%i%s" % (n, i[filename], e)
	i[filename] += 1
	fp = root.joinpath(filename)
	if not fp.exists():
	PTADownloader.process(dlink, fp, crf)
	else:
	self.logger.warning("Skipping %s..." % filename)

	@staticmethod
	def execute(cmd):
	logger.debug(" ".join(cmd))
	ts.execute(cmd)

	@staticmethod
	def process(url, path, crf=0):
	logger.info("Getting '%s'..." % path.filename)
	PTADownloader.execute(["wget", "-O", str(path), url])
	if crf > 0 and path.suffix == ".mp4":
	logger.info("Compressing '%s'..." % path.filename)
	TMP = ts.Path(path.dirname, ".tmp.mp4")
	PTADownloader.execute(["ffmpeg", "-i", str(path), "-vcodec", "libx265", "-crf", str(crf), str(TMP)])
	path.remove()
	TMP.rename(path.absolute())


	if __name__ == '__main__':
	parser.add_argument("cookie", help="session cookie")
	parser.add_argument("cid", nargs="+", type=ts.pos_int, help="course ID's")
	parser.add_argument("-c", "--compress", action="store_true", help="compress videos after download")
	parser.add_argument("--crf", type=ts.pos_int, default=30, help="compression factor")
	initialize()
	ptad = PTADownloader(args.cookie, args.verbose)
	for cid in args.cid:
	ptad.count()
	ptad.download(cid, args.crf if args.compress or args.crf != 30 else 0)