Skip to content

Instantly share code, notes, and snippets.

@henrydatei
Created January 24, 2022 22:09
Show Gist options
  • Save henrydatei/1fb4b6fc545b81606c8fa63d850f0a9b to your computer and use it in GitHub Desktop.
Save henrydatei/1fb4b6fc545b81606c8fa63d850f0a9b to your computer and use it in GitHub Desktop.
extracts the solution for Datacamp courses
import requests
import re
import html
from urllib.parse import urlparse, parse_qs
import difflib
import csv
from io import StringIO
# data is provided by Datacamp in a weired mixture of JSON and CSV, so I wrote my own function so find specific information
def findString2(result, search):
returnList = []
data = StringIO(result)
reader = csv.reader(data, delimiter=',')
for row in reader:
indices = [i for i, value in enumerate(row) if value == search]
for index in indices:
returnList.append(row[index + 1])
return returnList
url = "https://campus.datacamp.com/courses/data-types-for-data-science-in-python/dictionaries-the-root-of-python?ex=4"
req = requests.get(url)
result = html.unescape(re.search("window.PRELOADED_STATE = (.*)</script>", req.text).group(1)[:-1])
# process information from URL
titleraw = url.split("/")[4].replace("-", " ")
chapterraw = url.split("/")[5].split("?")[0].replace("-", " ")
parsed_url = urlparse(url)
exercise = int(parse_qs(parsed_url.query)['ex'][0])
# print solution
solLines = findString2(result, "solution")[exercise - 1].split('\\\\n')
for line in solLines:
print(line)
# find exact titles for exercise
titles = findString2(result, "title")
title = difflib.get_close_matches(titleraw, titles)[0]
chapter = difflib.get_close_matches(chapterraw, titles)[0]
exerciseTitle = titles[titles.index(chapter) + exercise]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment