-
-
Save rjdp/79bd3f8dbf883e57fff5bce4f5bb66c3 to your computer and use it in GitHub Desktop.
import os | |
import requests | |
import time | |
import sys | |
video_quality = "540p" # available qualities 360p, 540p, 720p | |
""" | |
In order to get courseId go to a course page open network tab in browser dev tools and search for "onDemandSpecializations" | |
in search input of network tab and then go to videos section of say week 1 , check the query param "courseId" its value is what we use can use as value for "one_of_specialization_course_id" | |
variable seen below incase the course belongs to a specialization other wise set is_specialization = False and course_id to the value of | |
courseId query_params's value | |
""" | |
one_of_specialization_course_id = ( | |
'W_mOXCrdEeeNPQ68_4aPpA' | |
# "YpwHVDb5EemE7gr4SGSAsA" | |
) # "pNXe13ICEeeBKg4MjLYj6A" #"BIU_pgCCEeiZdg6RDGBSdg" #"7H35pMSrEeefQQqXI6t6yg" #"pNXe13ICEeeBKg4MjLYj6A" #"ARf5_jvZEeeYEBLbuVGJ2g" #"nA4RUW01EeW8nRIpKnwp7Q" #get id of any course within a specializarion, this course id is from google IT Support Professional Certificate Specialization | |
try: | |
one_of_specialization_course_id = sys.argv[1] | |
except: | |
pass | |
spec_url = "https://www.coursera.org/api/onDemandSpecializations.v1?fields=courseIds,interchangeableCourseIds,launchedAt,logo,memberships,metadata,partnerIds,premiumExperienceVariant,onDemandSpecializationMemberships.v1(suggestedSessionSchedule),onDemandSpecializationSuggestedSchedule.v1(suggestedSessions),partners.v1(homeLink,name),courses.v1(courseProgress,description,membershipIds,startDate,v2Details,vcMembershipIds),v2Details.v1(onDemandSessions,plannedLaunchDate),memberships.v1(grade,vcMembershipId),vcMemberships.v1(certificateCodeWithGrade)&includes=courseIds,memberships,partnerIds,onDemandSpecializationMemberships.v1(suggestedSessionSchedule),courses.v1(courseProgress,membershipIds,v2Details,vcMembershipIds),v2Details.v1(onDemandSessions)&q=primary&courseId={}".format( | |
one_of_specialization_course_id | |
) | |
specialization_details = requests.get(spec_url).json() | |
courses = list( | |
map( | |
lambda x: {"name": x["name"], "slug": x["slug"], "id": x["id"]}, | |
specialization_details.get("linked").get("courses.v1"), | |
) | |
) | |
course_url = "https://www.coursera.org/api/onDemandCourseMaterials.v2/?q=slug&slug={}&includes=modules%2Clessons%2CpassableItemGroups%2CpassableItemGroupChoices%2CpassableLessonElements%2Citems%2Ctracks%2CgradePolicy&fields=moduleIds%2ConDemandCourseMaterialModules.v1(name%2Cslug%2Cdescription%2CtimeCommitment%2ClessonIds%2Coptional%2ClearningObjectives)%2ConDemandCourseMaterialLessons.v1(name%2Cslug%2CtimeCommitment%2CelementIds%2Coptional%2CtrackId)%2ConDemandCourseMaterialPassableItemGroups.v1(requiredPassedCount%2CpassableItemGroupChoiceIds%2CtrackId)%2ConDemandCourseMaterialPassableItemGroupChoices.v1(name%2Cdescription%2CitemIds)%2ConDemandCourseMaterialPassableLessonElements.v1(gradingWeight%2CisRequiredForPassing)%2ConDemandCourseMaterialItems.v2(name%2Cslug%2CtimeCommitment%2CcontentSummary%2CisLocked%2ClockableByItem%2CitemLockedReasonCode%2CtrackId%2ClockedStatus%2CitemLockSummary)%2ConDemandCourseMaterialTracks.v1(passablesCount)&showLockedItems=true" | |
courseid_url = "https://www.coursera.org/api/onDemandCourses.v1?q=slug&slug={}&includes=instructorIds%2CpartnerIds%2C_links&fields=brandingImage%2CcertificatePurchaseEnabledAt%2Cpartners.v1(squareLogo%2CrectangularLogo)%2Cinstructors.v1(fullName)%2CoverridePartnerLogos%2CsessionsEnabledAt%2CdomainTypes%2CpremiumExperienceVariant%2CisRestrictedMembership" | |
lecture_url = "https://www.coursera.org/api/onDemandLectureVideos.v1/{}~{}?includes=video&fields=onDemandVideos.v1(sources%2Csubtitles%2CsubtitlesVtt%2CsubtitlesTxt)" | |
is_specialization = True | |
if not is_specialization: | |
course_id = ( | |
"YpwHVDb5EemE7gr4SGSAsA" | |
) # plug in appropriate course id, slug, name manually | |
courses = [ | |
{ | |
"slug": "ibm-blockchain-essentials-for-developers", | |
"id": course_id, | |
"name": "IBM Blockchain Foundation for Developers", | |
} | |
] | |
if is_specialization: | |
specialization_name = specialization_details.get('elements')[0].get('name') | |
print(specialization_name) | |
if not os.path.exists(specialization_name): | |
os.makedirs(specialization_name) | |
os.chdir(specialization_name) | |
def download_file(file_name, file_url): | |
print(file_url, file_name) | |
start = time.time() | |
r = requests.get(file_url, stream=True) | |
total_length = r.headers.get("content-length") | |
dl = 0 | |
with open(file_name.strip().replace("/", " "), "wb") as file: | |
for chunk in r.iter_content(chunk_size=1024): | |
if chunk: | |
dl += len(chunk) | |
file.write(chunk) | |
done = int(50 * dl / int(total_length)) | |
sys.stdout.write( | |
"\r[%s%s] %s bps" | |
% ("=" * done, " " * (50 - done), dl // (time.time() - start)) | |
) | |
def main(): | |
for course in courses: | |
url = course_url.format(course["slug"]) | |
try: | |
lectures = [ | |
item | |
for item in requests.get(url) | |
.json() | |
.get("linked") | |
.get("onDemandCourseMaterialItems.v2") | |
if item.get("contentSummary").get("typeName") == "lecture" | |
] | |
print( | |
"Downloading course = {} {} {}".format( | |
"$" * 5, course["name"].replace(":", "-"), "$" * 5 | |
) | |
) | |
if not os.path.exists(course["name"].replace(":", "-")): | |
os.makedirs(course["name"].replace(":", "-")) | |
os.chdir(course["name"].replace(":", "-")) | |
for i, lecture in enumerate(lectures): | |
try: | |
file_name = str(i) + " - " + lecture["name"] + ".mp4" | |
if not os.path.exists(file_name.strip().replace("/", " ")): | |
print( | |
"Downloading lecture = {} {} {}".format( | |
"%" * 5, lecture["name"], "%" * 5 | |
) | |
) | |
courseid = ( | |
requests.get(courseid_url.format(course["slug"])) | |
.json() | |
.get("elements")[0] | |
.get("id") | |
) | |
lec_url = lecture_url.format(courseid, lecture["id"]) | |
lec_video_url = ( | |
requests.get(lec_url) | |
.json() | |
.get("linked") | |
.get("onDemandVideos.v1")[0] | |
.get("sources") | |
.get("byResolution") | |
.get(video_quality) | |
.get("mp4VideoUrl") | |
) | |
download_file(file_name, lec_video_url) | |
except Exception as e: | |
print(str(e)) | |
raise(e) | |
os.chdir("..") | |
except Exception as a: | |
raise(a) | |
main() |
@rjdp Thanks guys, i have gotten it working,
but what can be changed to make it download the jupyter files
Thanks for your work! I'd like to propose changing line 92 to the following:
file_name = str('%03i' % i) + " - " + lecture["name"].replace(":", "-") + ".mp4"
This pads the number to be 3 digits and helps with sorting files. Additionally, it replaces ':' in the filenames to avoid issues on Windows (same as was done in the file path for the same issue).
@Afeez1131 the new API has been updated in coursera-dl project, this script was for temporary purpose only. So checkout coulsera-dl page about your query on course assets. thanks :)
very good script, but how i donload hole specilaization (video and subtitle + all materials)?
also i need do the same with edx
thanks
How to use it? Could you give a brief demo?
How do i get the specialization id ??