Skip to content

Instantly share code, notes, and snippets.

@anabarasan
Created May 3, 2020 08:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anabarasan/e6b5b6842e97592ec1eaffbc30ce703e to your computer and use it in GitHub Desktop.
Save anabarasan/e6b5b6842e97592ec1eaffbc30ce703e to your computer and use it in GitHub Desktop.
"""coursera course list sorted by duration"""
#!/usr/bin/env python3
import csv
from http.server import SimpleHTTPRequestHandler
import json
from operator import itemgetter
from socketserver import TCPServer
import threading
from time import sleep
import webbrowser
import requests
with open("request_body.json") as ipfile:
post_body = json.load(ipfile)
categories = sorted([
"data-science"
"information-technology",
"computer-science",
"arts-and-humanities",
"social-sciences",
"language-learning",
])
for idx, title in enumerate(categories):
print(idx, title)
selected_category = int(input("Select category => "))
category = categories[selected_category]
courses = []
durations = [
"1-4 Weeks",
"1-3 Months",
]
for duration in durations:
START = 0
URL = "https://www.coursera.org/graphqlBatch?opname=catalogResultQuery"
facets = post_body[0]["variables"]["facets"]
for idx, facet in enumerate(facets):
if facet.startswith("productDurationEnum:"):
facets[idx] = "productDurationEnum:" + duration
if facet.startswith("categoryMultiTag:"):
facets[idx] = "categoryMultiTag:" + category
while START is not None:
print("fetching 30 titles starting from", START, "for duration", duration)
post_body[0]["variables"]["start"] = str(START)
headers = {"content-type": "application/json"}
response = requests.post(URL, data=json.dumps(post_body), headers=headers)
# print(response.status_code)
response.raise_for_status()
data = response.json()
# print("items in response => ", len(data))
for record in data:
browser = record["data"]["CatalogResultsV2Resource"]["browseV2"]
# print(browser["paging"])
total = browser["paging"]["total"]
START = browser["paging"]["next"]
# print(total, START)
elements = browser["elements"]
for element in elements:
course_elements = element["courses"]["elements"]
for course in course_elements:
detail = {
"url" : "https://www.coursera.org/learn/" + course["slug"],
"name": course["name"],
"level": course["level"],
"duration": course["courseDerivativesV2"]["avgLearningHoursAdjusted"]
}
courses.append(detail)
sleep(2)
courses.sort(key=itemgetter("duration"))
with open('courses.csv', 'w') as csvfile:
fieldnames = ["name", "url", "level", "duration"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(courses)
with open("courses.json", "w") as outfile:
json.dump(courses, outfile)
def http_server():
"""http server"""
server_address = ('', 8000)
with TCPServer(server_address, SimpleHTTPRequestHandler) as httpd:
print("serving at http://localhost:8000")
httpd.serve_forever()
threading.Thread(target=http_server).start()
webbrowser.open_new_tab("http://localhost:8000")
<!doctype html>
<html>
<head>
<title>Coursera Courses</title>
<script
src="https://code.jquery.com/jquery-3.5.0.min.js"
integrity="sha256-xNzN2a4ltkB44Mc/Jz3pT4iU1cmeR0FkXs4pru/JxaQ="
crossorigin="anonymous"></script>
<script>
courses = [];
function list_courses() {
table_data = ""
for (var i=0; i<courses.length; i++) {
course = courses[i];
href = course['url'];
name = course['name'];
level = course['level'];
duration = course['duration'];
table_data += "<tr><td><a target='_blank' href='" + href + "'>" + name + "</a></td><td>" + level + "</td><td>" + duration + "</td></tr>";
}
$('table').html(table_data);
}
$(document).ready(function() {
$.get('courses.json', function(data){
courses = data;
list_courses();
});
})
</script>
</head>
<body>
<table></table>
</body>
</html>
[{"operationName":"catalogResultQuery","variables":{"limit":30,"facets":["skillNameMultiTag","jobTitleMultiTag","difficultyLevelTag","languages","productDurationEnum:1-4 Weeks","entityTypeTag","partnerMultiTag","categoryMultiTag:information-technology","subcategoryMultiTag"],"sortField":"","start":"0","skip":false},"query":"query catalogResultQuery($facets: [String!]!, $start: String!, $skip: Boolean = false, $sortField: String, $limit: Int) {\n CatalogResultsV2Resource @skip(if: $skip) {\n browseV2(facets: $facets, start: $start, limit: $limit, sortField: $sortField) {\n elements {\n label\n entries {\n id\n score\n courseId\n specializationId\n onDemandSpecializationId\n resourceName\n __typename\n }\n domainId\n subdomainId\n facets\n courses {\n elements {\n ...FullCourseFragment\n __typename\n }\n __typename\n }\n s12ns {\n elements {\n ...FullS12nFragment\n __typename\n }\n __typename\n }\n __typename\n }\n paging {\n total\n next\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment FullCourseFragment on CoursesV1 {\n ...LightweightCourseFragment\n courseDerivativesV2 {\n skillTags {\n skillName\n relevanceScore\n __typename\n }\n avgLearningHoursAdjusted\n commentCount\n averageFiveStarRating\n ratingCount\n __typename\n }\n __typename\n}\n\nfragment LightweightCourseFragment on CoursesV1 {\n id\n slug\n name\n photoUrl\n s12nIds\n premiumExperienceVariant\n level\n workload\n primaryLanguages\n partners {\n elements {\n id\n name\n squareLogo\n classLogo\n logo\n __typename\n }\n __typename\n }\n courseTypeMetadata {\n courseTypeMetadata {\n ...RhymeProjectFragment\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment RhymeProjectFragment on CourseTypeMetadataV1_rhymeProjectMember {\n ... on CourseTypeMetadataV1_rhymeProjectMember {\n rhymeProject {\n typeNameIndex\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment FullS12nFragment on OnDemandSpecializationsV1 {\n ...LightweightS12nFragment\n derivativeV2 {\n averageFiveStarRating\n avgLearningHoursAdjusted\n __typename\n }\n courses {\n elements {\n courseDerivativesV2 {\n skillTags {\n skillName\n relevanceScore\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment LightweightS12nFragment on OnDemandSpecializationsV1 {\n name\n id\n slug\n logo\n courseIds\n partners {\n elements {\n id\n name\n squareLogo\n classLogo\n logo\n __typename\n }\n __typename\n }\n metadata {\n headerImage\n level\n __typename\n }\n productVariant\n __typename\n}\n"}]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment