Created
May 3, 2020 08:39
-
-
Save anabarasan/e6b5b6842e97592ec1eaffbc30ce703e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""coursera course list sorted by duration""" | |
#!/usr/bin/env python3 | |
import csv | |
from http.server import SimpleHTTPRequestHandler | |
import json | |
from operator import itemgetter | |
from socketserver import TCPServer | |
import threading | |
from time import sleep | |
import webbrowser | |
import requests | |
with open("request_body.json") as ipfile: | |
post_body = json.load(ipfile) | |
categories = sorted([ | |
"data-science" | |
"information-technology", | |
"computer-science", | |
"arts-and-humanities", | |
"social-sciences", | |
"language-learning", | |
]) | |
for idx, title in enumerate(categories): | |
print(idx, title) | |
selected_category = int(input("Select category => ")) | |
category = categories[selected_category] | |
courses = [] | |
durations = [ | |
"1-4 Weeks", | |
"1-3 Months", | |
] | |
for duration in durations: | |
START = 0 | |
URL = "https://www.coursera.org/graphqlBatch?opname=catalogResultQuery" | |
facets = post_body[0]["variables"]["facets"] | |
for idx, facet in enumerate(facets): | |
if facet.startswith("productDurationEnum:"): | |
facets[idx] = "productDurationEnum:" + duration | |
if facet.startswith("categoryMultiTag:"): | |
facets[idx] = "categoryMultiTag:" + category | |
while START is not None: | |
print("fetching 30 titles starting from", START, "for duration", duration) | |
post_body[0]["variables"]["start"] = str(START) | |
headers = {"content-type": "application/json"} | |
response = requests.post(URL, data=json.dumps(post_body), headers=headers) | |
# print(response.status_code) | |
response.raise_for_status() | |
data = response.json() | |
# print("items in response => ", len(data)) | |
for record in data: | |
browser = record["data"]["CatalogResultsV2Resource"]["browseV2"] | |
# print(browser["paging"]) | |
total = browser["paging"]["total"] | |
START = browser["paging"]["next"] | |
# print(total, START) | |
elements = browser["elements"] | |
for element in elements: | |
course_elements = element["courses"]["elements"] | |
for course in course_elements: | |
detail = { | |
"url" : "https://www.coursera.org/learn/" + course["slug"], | |
"name": course["name"], | |
"level": course["level"], | |
"duration": course["courseDerivativesV2"]["avgLearningHoursAdjusted"] | |
} | |
courses.append(detail) | |
sleep(2) | |
courses.sort(key=itemgetter("duration")) | |
with open('courses.csv', 'w') as csvfile: | |
fieldnames = ["name", "url", "level", "duration"] | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
writer.writeheader() | |
writer.writerows(courses) | |
with open("courses.json", "w") as outfile: | |
json.dump(courses, outfile) | |
def http_server(): | |
"""http server""" | |
server_address = ('', 8000) | |
with TCPServer(server_address, SimpleHTTPRequestHandler) as httpd: | |
print("serving at http://localhost:8000") | |
httpd.serve_forever() | |
threading.Thread(target=http_server).start() | |
webbrowser.open_new_tab("http://localhost:8000") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!doctype html> | |
<html> | |
<head> | |
<title>Coursera Courses</title> | |
<script | |
src="https://code.jquery.com/jquery-3.5.0.min.js" | |
integrity="sha256-xNzN2a4ltkB44Mc/Jz3pT4iU1cmeR0FkXs4pru/JxaQ=" | |
crossorigin="anonymous"></script> | |
<script> | |
courses = []; | |
function list_courses() { | |
table_data = "" | |
for (var i=0; i<courses.length; i++) { | |
course = courses[i]; | |
href = course['url']; | |
name = course['name']; | |
level = course['level']; | |
duration = course['duration']; | |
table_data += "<tr><td><a target='_blank' href='" + href + "'>" + name + "</a></td><td>" + level + "</td><td>" + duration + "</td></tr>"; | |
} | |
$('table').html(table_data); | |
} | |
$(document).ready(function() { | |
$.get('courses.json', function(data){ | |
courses = data; | |
list_courses(); | |
}); | |
}) | |
</script> | |
</head> | |
<body> | |
<table></table> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[{"operationName":"catalogResultQuery","variables":{"limit":30,"facets":["skillNameMultiTag","jobTitleMultiTag","difficultyLevelTag","languages","productDurationEnum:1-4 Weeks","entityTypeTag","partnerMultiTag","categoryMultiTag:information-technology","subcategoryMultiTag"],"sortField":"","start":"0","skip":false},"query":"query catalogResultQuery($facets: [String!]!, $start: String!, $skip: Boolean = false, $sortField: String, $limit: Int) {\n CatalogResultsV2Resource @skip(if: $skip) {\n browseV2(facets: $facets, start: $start, limit: $limit, sortField: $sortField) {\n elements {\n label\n entries {\n id\n score\n courseId\n specializationId\n onDemandSpecializationId\n resourceName\n __typename\n }\n domainId\n subdomainId\n facets\n courses {\n elements {\n ...FullCourseFragment\n __typename\n }\n __typename\n }\n s12ns {\n elements {\n ...FullS12nFragment\n __typename\n }\n __typename\n }\n __typename\n }\n paging {\n total\n next\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment FullCourseFragment on CoursesV1 {\n ...LightweightCourseFragment\n courseDerivativesV2 {\n skillTags {\n skillName\n relevanceScore\n __typename\n }\n avgLearningHoursAdjusted\n commentCount\n averageFiveStarRating\n ratingCount\n __typename\n }\n __typename\n}\n\nfragment LightweightCourseFragment on CoursesV1 {\n id\n slug\n name\n photoUrl\n s12nIds\n premiumExperienceVariant\n level\n workload\n primaryLanguages\n partners {\n elements {\n id\n name\n squareLogo\n classLogo\n logo\n __typename\n }\n __typename\n }\n courseTypeMetadata {\n courseTypeMetadata {\n ...RhymeProjectFragment\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment RhymeProjectFragment on CourseTypeMetadataV1_rhymeProjectMember {\n ... on CourseTypeMetadataV1_rhymeProjectMember {\n rhymeProject {\n typeNameIndex\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment FullS12nFragment on OnDemandSpecializationsV1 {\n ...LightweightS12nFragment\n derivativeV2 {\n averageFiveStarRating\n avgLearningHoursAdjusted\n __typename\n }\n courses {\n elements {\n courseDerivativesV2 {\n skillTags {\n skillName\n relevanceScore\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment LightweightS12nFragment on OnDemandSpecializationsV1 {\n name\n id\n slug\n logo\n courseIds\n partners {\n elements {\n id\n name\n squareLogo\n classLogo\n logo\n __typename\n }\n __typename\n }\n metadata {\n headerImage\n level\n __typename\n }\n productVariant\n __typename\n}\n"}] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment