Skip to content

Instantly share code, notes, and snippets.

@debakarr
Last active March 4, 2024 22:20
Show Gist options
  • Save debakarr/0104c0a91913f9261f604c1a4061a14f to your computer and use it in GitHub Desktop.
Save debakarr/0104c0a91913f9261f604c1a4061a14f to your computer and use it in GitHub Desktop.
Simple Python Script to download KodeKloud Resources

Disclaimer

Please use this only to keep your local copy. Please don't distribute the courses illegally. You will be responsible for the legal consequences.


Steps to follow

  • Sign in to kodekloud.com
  • Right Click -> Inspect -> Go to Network Tab -> Reload -> Select the first resource and copy the cookie:
  • Install Python and then some depencencies (use virtual environment if possible): pip install requests yt-dlp beautifulsoup4 markdownify
  • Use this code to download all video lecture for all courses (make sure to replace cookie and download path):
python kodekloud_scrap.py
import string
import subprocess
from dataclasses import dataclass
from pathlib import Path
from shlex import split
from typing import List

import markdownify
import requests
import yt_dlp
from bs4 import BeautifulSoup

all_course_link = [
    "https://kodekloud.com/courses/docker-for-the-absolute-beginner/",
    "https://kodekloud.com/courses/red-hat-certified-system-administratorrhcsa-2/",
    "https://kodekloud.com/courses/kustomize/",
    "https://kodekloud.com/courses/devsecops/",
    "https://kodekloud.com/courses/az-104-microsoft-azure-administrator/",
    "https://kodekloud.com/courses/devops-interview-prep-course/",
    "https://kodekloud.com/courses/terraform-challenges/",
    "https://kodekloud.com/courses/hashicorp-certified-consul-associate-certification/",
    "https://kodekloud.com/courses/hashicorp-certified-vault-associate-certification/",
    "https://kodekloud.com/courses/kubernetes-challenges/",
    "https://kodekloud.com/courses/linux-challenges/",
    "https://kodekloud.com/courses/hashicorp-certified-vault-operations-professional-2022/",
    "https://kodekloud.com/courses/cks-challenges/",
    "https://kodekloud.com/courses/linux-foundation-certified-system-administrator-lfcs/",
    "https://kodekloud.com/courses/jenkins/",
    "https://kodekloud.com/courses/golang/",
    "https://kodekloud.com/courses/hashicorp-certified-terraform-associate/",
    "https://kodekloud.com/courses/helm-for-beginners/",
    "https://kodekloud.com/courses/certified-associate-in-python-programming/",
    "https://kodekloud.com/courses/istio-service-mesh/",
    "https://kodekloud.com/courses/jinja2-templating/",
    "https://kodekloud.com/courses/certified-kubernetes-security-specialist-cks/",
    "https://kodekloud.com/courses/python-entry-level-programmer-certification/",
    "https://kodekloud.com/courses/json-path-quiz/",
    "https://kodekloud.com/courses/docker-certified-associate-exam-course/",
    "https://kodekloud.com/courses/terraform-for-beginners/",
    "https://kodekloud.com/courses/certified-kubernetes-administrator-cka/",
    "https://kodekloud.com/courses/certified-kubernetes-application-developer-ckad/",
    "https://kodekloud.com/courses/kubernetes-for-the-absolute-beginners-hands-on/",
    "https://kodekloud.com/courses/chef-for-the-absolute-beginners/",
    "https://kodekloud.com/courses/openshift-for-the-absolute-beginners/",
    "https://kodekloud.com/courses/ansible-certification-preparation-course/",
    "https://kodekloud.com/courses/ansible-for-the-absolute-beginners-couse/",
    "https://kodekloud.com/courses/docker-swarm-services-stacks-hands-on/",
    "https://kodekloud.com/courses/devops-pre-requisite-course/",
    "https://kodekloud.com/courses/the-linux-basics-course/",
    "https://kodekloud.com/courses/shell-scripts-for-beginners/",
    "https://kodekloud.com/courses/puppet-for-the-absolute-beginners-course/",
    "https://kodekloud.com/courses/git-for-beginners/",
    "https://kodekloud.com/courses/aws-cloud-for-beginners/",
    "https://kodekloud.com/courses/lens-introduction/",
    "https://kodekloud.com/courses/linode-kubernetes-engine/",
]


@dataclass
class Lesson:
    name: str
    url: str
    is_video: bool


@dataclass
class Topic:
    name: str
    lessons: List[Lesson]

    @classmethod
    def make(cls, topic):
        title = topic.find("div", class_="ld-item-title")
        name = title.span.text.strip()

        lessons_urls = topic.find_all("a", class_="ld-topic-row")
        lessons_names = topic.find_all("span", class_="ld-topic-title")

        lessons = []

        for lesson_url, lesson_name in zip(lessons_urls, lessons_names):
            is_video = "video_topic_enabled" in lesson_name["class"]
            lessons.append(
                Lesson(
                    name=lesson_name.text.strip(),
                    url=lesson_url["href"],
                    is_video=is_video,
                )
            )
        return Topic(name=name, lessons=lessons)


# def download_video(url, output_path):
#     yt_dlp.utils.std_headers[
#         "Cookie"
#     ] = """"""
#     ydl_opts = {
#         "concurrent_fragment_downloads": 15,
#         "outtmpl": f"{output_path}.%(ext)s",
#     }
#     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
#         ydl.download([url])

COOKIE = """<REPLACE_WITH_YOUR_COOKIE>"""


def download_video(url: str, output_path: Path) -> int:
    command = f"yt-dlp -f bestvideo+bestaudio -N 15 --add-header 'Cookie:{COOKIE}' -o '{output_path}' {url}"
    process = subprocess.Popen(split(command))
    return process.wait()


def normalize_name(name: str) -> str:
    return name.translate(str.maketrans("", "", string.punctuation))


def is_normal_content(content: str) -> bool:
    is_lab = content.find("div", class_="start-lab-button")
    is_feedback = content.find_all("iframe")
    return not (is_lab or is_feedback)


def download_all_pdf(content, download_path: Path) -> None:
    for link in content.find_all("a"):
        href = link.get("href")
        if href.endswith("pdf"):
            file_name = download_path / Path(href).name
            print(f"Downloading {file_name}...")
            response = requests.get(href, headers={"Cookie": COOKIE})
            file_name.write_bytes(response.content)


for course_link in all_course_link:
    page = requests.get(course_link)
    soup = BeautifulSoup(page.content, "html.parser")

    topics = soup.find_all("div", class_="ld-item-list-item")
    course_title = soup.find("h1", class_="entry-title").text.strip()
    items = []
    for topic in topics:
        items.append(Topic.make(topic))
    for i, item in enumerate(items, start=1):
        for j, lesson in enumerate(item.lessons, start=1):
            file_path = Path(
                Path.home()
                / "Downloads"
                / "KodeKloud"
                / normalize_name(course_title)
                / f"{i} - {normalize_name(item.name)}"
                / f"{j} - {normalize_name(lesson.name)}"
            )
            if lesson.is_video:
                print(f"Writing video file... {file_path}...")
                file_path.parent.mkdir(parents=True, exist_ok=True)
                download_video(lesson.url, file_path.with_suffix(".mp4"))
            else:
                page = requests.get(lesson.url, headers={"Cookie": COOKIE})
                soup = BeautifulSoup(page.content, "html.parser")
                content = soup.find("div", class_="learndash_content_wrap")
                if is_normal_content(content):
                    print(f"Writing resource file... {file_path}...")
                    file_path.parent.mkdir(parents=True, exist_ok=True)
                    file_path.with_suffix(".md").write_text(
                        markdownify.markdownify(content.prettify()), encoding="utf-8"
                    )
                    download_all_pdf(content, file_path.parent)

NOTE: For some courses we need to enroll first before downloading. Example: image

If we don't enroll then it download same introduction video for all the lessons.


Course Decks

Gist

@KevinKeo
Copy link

Doesn't seems to work

@StylishBros
Copy link

Where to replace the download path? Please let me know the lines!

@debakarr
Copy link
Author

Where to replace the download path? Please let me know the lines!

file_path = Path(
                Path.home()  # You can replace the download path in this line.
                / "Downloads"
                / "KodeKloud"
                / normalize_name(course_title)
                / f"{i} - {normalize_name(item.name)}"
                / f"{j} - {normalize_name(lesson.name)}"
            )

@StylishBros
Copy link

StylishBros commented Dec 21, 2022

I tried replacing the download path. I have attached the modified code and the error.

Screenshot 2022-12-21 115330
Screenshot 2022-12-21 115359

@debakarr
Copy link
Author

I tried replacing the download path. I have attached the modified code and the error.

Screenshot 2022-12-21 115330 Screenshot 2022-12-21 115359

Try this:

file_path = Path("C:/Users/gjsch/OneDrive/Desktop/Courses") / "KodeKloud" / normalize_name(course_title) / f"{i} - {normalize_name(item.name)}" / f"{j} - {normalize_name(lesson.name)}"

@timsd20
Copy link

timsd20 commented Dec 23, 2022

I face the error

Traceback (most recent call last): File "/path/kodekloud_scrap.py", line 165, in <module> if is_normal_content(content): File "/path/kodekloud_scrap.py", line 123, in is_normal_content is_lab = content.find("div", class_="start-lab-button") AttributeError: 'NoneType' object has no attribute 'find'
seams it try to download the course resource (pdf) then failed

@ppa1985
Copy link

ppa1985 commented Apr 16, 2023

Traceback (most recent call last):
File "C:\Users\TAM001\Downloads\kodekloud\KodeKloud_Scrapper.py", line 190, in
file_path = Path(r"C:\Users\TAM001\Dropbox\PC\Documents\my file\KKL") / "Downloads" / "KodeKloud" / normalize_name(course_title) / f"{i} - {normalize_name(item.name)}" / f"{j} - {normalize_name(lesson.name)}"
File "C:\Users\TAM001\Downloads\kodekloud\KodeKloud_Scrapper.py", line 160, in normalize_name
return name.translate(str.maketrans("", "", string.punctuation))
TypeError: 'NoneType' object is not callable

@ppa1985
Copy link

ppa1985 commented Apr 16, 2023

file_path = Path("C:/Users/gjsch/OneDrive/Desktop/Courses") / "KodeKloud" / normalize_name(course_title) / f"{i} - {normalize_name(item.name)}" / f"{j} - {normalize_name(lesson.name)}"

Which I changed file path as you mention above and error as like this.

@ppa1985
Copy link

ppa1985 commented Apr 16, 2023

I face the error

Traceback (most recent call last): File "/path/kodekloud_scrap.py", line 165, in <module> if is_normal_content(content): File "/path/kodekloud_scrap.py", line 123, in is_normal_content is_lab = content.find("div", class_="start-lab-button") AttributeError: 'NoneType' object has no attribute 'find' seams it try to download the course resource (pdf) then failed

@timsd20 can you done video download? I can't get anything, video and also pdf. just only created folder.

@ppa1985
Copy link

ppa1985 commented Apr 16, 2023

@debakarr Dear Bro, If you free, please give me a suggestion to do. my subscription is nearly finished and just 15days left to download.

@ppa1985
Copy link

ppa1985 commented Apr 17, 2023

Now I got the error as like this

Writing video file... C:\Users\dhs\Documents\myfile\KodeKloud\Learn Devops – AWS Cloud for Beginners\1 - Introduction\1 - Course Introduction 0042...
Traceback (most recent call last):
File "c:\Users\dhs\Documents\code\KodeKloud_Scrapper1.py", line 188, in
download_video(lesson.url, file_path.with_suffix(".mp4"))
File "c:\Users\dhs\Documents\code\KodeKloud_Scrapper1.py", line 138, in download_video
process = subprocess.Popen(split(command))
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\lib\subprocess.py", line 971, in init
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\lib\subprocess.py", line 1456, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
FileNotFoundError: [WinError 2] The system cannot find the file specified

@ppa1985
Copy link

ppa1985 commented Apr 17, 2023

Now I got the error as like this

Writing video file... C:\Users\dhs\Documents\myfile\KodeKloud\Learn Devops – AWS Cloud for Beginners\1 - Introduction\1 - Course Introduction 0042... Traceback (most recent call last): File "c:\Users\dhs\Documents\code\KodeKloud_Scrapper1.py", line 188, in download_video(lesson.url, file_path.with_suffix(".mp4")) File "c:\Users\dhs\Documents\code\KodeKloud_Scrapper1.py", line 138, in download_video process = subprocess.Popen(split(command)) File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\lib\subprocess.py", line 971, in init self._execute_child(args, executable, preexec_fn, close_fds, File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\lib\subprocess.py", line 1456, in _execute_child hp, ht, pid, tid = _winapi.CreateProcess(executable, args, FileNotFoundError: [WinError 2] The system cannot find the file specified

how to fix this error?

@debakarr
Copy link
Author

@ppa1985 Currently I don't have any subscription. On 24th KodeKloud is starting free week. I will check the script and update it that day.

@ppa1985
Copy link

ppa1985 commented Apr 18, 2023

@ppa1985 Currently I don't have any subscription. On 24th KodeKloud is starting free week. I will check the script and update it that day.

Noted

@debakarr
Copy link
Author

@ppa1985 I tried moving the logic to python package: https://github.com/debakarr/kodekloud-downloader

The download through URL still have some bug but you can download via this command kodekloud dl -o . -c <YOUR COOKIE HERE INSIDE DOUBLE QUOTES>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment