Last active
December 22, 2020 16:49
-
-
Save rjdp/79bd3f8dbf883e57fff5bce4f5bb66c3 to your computer and use it in GitHub Desktop.
Script for downloading course Lectures from coursera specialization or individual course
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
import time | |
import sys | |
video_quality = "540p" # available qualities 360p, 540p, 720p | |
""" | |
In order to get courseId go to a course page open network tab in browser dev tools and search for "onDemandSpecializations" | |
in search input of network tab and then go to videos section of say week 1 , check the query param "courseId" its value is what we use can use as value for "one_of_specialization_course_id" | |
variable seen below incase the course belongs to a specialization other wise set is_specialization = False and course_id to the value of | |
courseId query_params's value | |
""" | |
one_of_specialization_course_id = ( | |
'W_mOXCrdEeeNPQ68_4aPpA' | |
# "YpwHVDb5EemE7gr4SGSAsA" | |
) # "pNXe13ICEeeBKg4MjLYj6A" #"BIU_pgCCEeiZdg6RDGBSdg" #"7H35pMSrEeefQQqXI6t6yg" #"pNXe13ICEeeBKg4MjLYj6A" #"ARf5_jvZEeeYEBLbuVGJ2g" #"nA4RUW01EeW8nRIpKnwp7Q" #get id of any course within a specializarion, this course id is from google IT Support Professional Certificate Specialization | |
try: | |
one_of_specialization_course_id = sys.argv[1] | |
except: | |
pass | |
spec_url = "https://www.coursera.org/api/onDemandSpecializations.v1?fields=courseIds,interchangeableCourseIds,launchedAt,logo,memberships,metadata,partnerIds,premiumExperienceVariant,onDemandSpecializationMemberships.v1(suggestedSessionSchedule),onDemandSpecializationSuggestedSchedule.v1(suggestedSessions),partners.v1(homeLink,name),courses.v1(courseProgress,description,membershipIds,startDate,v2Details,vcMembershipIds),v2Details.v1(onDemandSessions,plannedLaunchDate),memberships.v1(grade,vcMembershipId),vcMemberships.v1(certificateCodeWithGrade)&includes=courseIds,memberships,partnerIds,onDemandSpecializationMemberships.v1(suggestedSessionSchedule),courses.v1(courseProgress,membershipIds,v2Details,vcMembershipIds),v2Details.v1(onDemandSessions)&q=primary&courseId={}".format( | |
one_of_specialization_course_id | |
) | |
specialization_details = requests.get(spec_url).json() | |
courses = list( | |
map( | |
lambda x: {"name": x["name"], "slug": x["slug"], "id": x["id"]}, | |
specialization_details.get("linked").get("courses.v1"), | |
) | |
) | |
course_url = "https://www.coursera.org/api/onDemandCourseMaterials.v2/?q=slug&slug={}&includes=modules%2Clessons%2CpassableItemGroups%2CpassableItemGroupChoices%2CpassableLessonElements%2Citems%2Ctracks%2CgradePolicy&fields=moduleIds%2ConDemandCourseMaterialModules.v1(name%2Cslug%2Cdescription%2CtimeCommitment%2ClessonIds%2Coptional%2ClearningObjectives)%2ConDemandCourseMaterialLessons.v1(name%2Cslug%2CtimeCommitment%2CelementIds%2Coptional%2CtrackId)%2ConDemandCourseMaterialPassableItemGroups.v1(requiredPassedCount%2CpassableItemGroupChoiceIds%2CtrackId)%2ConDemandCourseMaterialPassableItemGroupChoices.v1(name%2Cdescription%2CitemIds)%2ConDemandCourseMaterialPassableLessonElements.v1(gradingWeight%2CisRequiredForPassing)%2ConDemandCourseMaterialItems.v2(name%2Cslug%2CtimeCommitment%2CcontentSummary%2CisLocked%2ClockableByItem%2CitemLockedReasonCode%2CtrackId%2ClockedStatus%2CitemLockSummary)%2ConDemandCourseMaterialTracks.v1(passablesCount)&showLockedItems=true" | |
courseid_url = "https://www.coursera.org/api/onDemandCourses.v1?q=slug&slug={}&includes=instructorIds%2CpartnerIds%2C_links&fields=brandingImage%2CcertificatePurchaseEnabledAt%2Cpartners.v1(squareLogo%2CrectangularLogo)%2Cinstructors.v1(fullName)%2CoverridePartnerLogos%2CsessionsEnabledAt%2CdomainTypes%2CpremiumExperienceVariant%2CisRestrictedMembership" | |
lecture_url = "https://www.coursera.org/api/onDemandLectureVideos.v1/{}~{}?includes=video&fields=onDemandVideos.v1(sources%2Csubtitles%2CsubtitlesVtt%2CsubtitlesTxt)" | |
is_specialization = True | |
if not is_specialization: | |
course_id = ( | |
"YpwHVDb5EemE7gr4SGSAsA" | |
) # plug in appropriate course id, slug, name manually | |
courses = [ | |
{ | |
"slug": "ibm-blockchain-essentials-for-developers", | |
"id": course_id, | |
"name": "IBM Blockchain Foundation for Developers", | |
} | |
] | |
if is_specialization: | |
specialization_name = specialization_details.get('elements')[0].get('name') | |
print(specialization_name) | |
if not os.path.exists(specialization_name): | |
os.makedirs(specialization_name) | |
os.chdir(specialization_name) | |
def download_file(file_name, file_url): | |
print(file_url, file_name) | |
start = time.time() | |
r = requests.get(file_url, stream=True) | |
total_length = r.headers.get("content-length") | |
dl = 0 | |
with open(file_name.strip().replace("/", " "), "wb") as file: | |
for chunk in r.iter_content(chunk_size=1024): | |
if chunk: | |
dl += len(chunk) | |
file.write(chunk) | |
done = int(50 * dl / int(total_length)) | |
sys.stdout.write( | |
"\r[%s%s] %s bps" | |
% ("=" * done, " " * (50 - done), dl // (time.time() - start)) | |
) | |
def main(): | |
for course in courses: | |
url = course_url.format(course["slug"]) | |
try: | |
lectures = [ | |
item | |
for item in requests.get(url) | |
.json() | |
.get("linked") | |
.get("onDemandCourseMaterialItems.v2") | |
if item.get("contentSummary").get("typeName") == "lecture" | |
] | |
print( | |
"Downloading course = {} {} {}".format( | |
"$" * 5, course["name"].replace(":", "-"), "$" * 5 | |
) | |
) | |
if not os.path.exists(course["name"].replace(":", "-")): | |
os.makedirs(course["name"].replace(":", "-")) | |
os.chdir(course["name"].replace(":", "-")) | |
for i, lecture in enumerate(lectures): | |
try: | |
file_name = str(i) + " - " + lecture["name"] + ".mp4" | |
if not os.path.exists(file_name.strip().replace("/", " ")): | |
print( | |
"Downloading lecture = {} {} {}".format( | |
"%" * 5, lecture["name"], "%" * 5 | |
) | |
) | |
courseid = ( | |
requests.get(courseid_url.format(course["slug"])) | |
.json() | |
.get("elements")[0] | |
.get("id") | |
) | |
lec_url = lecture_url.format(courseid, lecture["id"]) | |
lec_video_url = ( | |
requests.get(lec_url) | |
.json() | |
.get("linked") | |
.get("onDemandVideos.v1")[0] | |
.get("sources") | |
.get("byResolution") | |
.get(video_quality) | |
.get("mp4VideoUrl") | |
) | |
download_file(file_name, lec_video_url) | |
except Exception as e: | |
print(str(e)) | |
raise(e) | |
os.chdir("..") | |
except Exception as a: | |
raise(a) | |
main() |
very good script, but how i donload hole specilaization (video and subtitle + all materials)?
also i need do the same with edx
thanks
How to use it? Could you give a brief demo?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@Afeez1131 the new API has been updated in coursera-dl project, this script was for temporary purpose only. So checkout coulsera-dl page about your query on course assets. thanks :)