Skip to content

Instantly share code, notes, and snippets.

@Afeez1131
Forked from rjdp/coursera_downloader.py
Created June 22, 2018 11:52
Show Gist options
  • Save Afeez1131/9f6338c4a2e35fd2069ef391cdb599db to your computer and use it in GitHub Desktop.
Save Afeez1131/9f6338c4a2e35fd2069ef391cdb599db to your computer and use it in GitHub Desktop.
Script for downloading course Lectures from coursera specialization
import os
import requests
import time
import sys
video_quality = '360p' # available qualities 360p, 540p, 720p
"""
In order to get courseId go to a course page open network tab in browser dev tools and search for "onDemandSpecializations"
in search input of network tab and then go to videos section of say week 1 , check the query param "courseId" its value is what we use can use as value for "one_of_specialization_course_id"
variable seen below incase the course belongs to a specialization other wise set is_specialization = False and course_id to the value of
courseId query_params's value
"""
one_of_specialization_course_id = "iSxVEG07EeW3YxLB1q9I2w"#"pNXe13ICEeeBKg4MjLYj6A" #"BIU_pgCCEeiZdg6RDGBSdg" #"7H35pMSrEeefQQqXI6t6yg" #"pNXe13ICEeeBKg4MjLYj6A" #"ARf5_jvZEeeYEBLbuVGJ2g" #"nA4RUW01EeW8nRIpKnwp7Q" #get id of any course within a specializarion, this course id is from google IT Support Professional Certificate Specialization
spec_url = "https://www.coursera.org/api/onDemandSpecializations.v1?fields=courseIds,interchangeableCourseIds,launchedAt,logo,memberships,metadata,partnerIds,premiumExperienceVariant,onDemandSpecializationMemberships.v1(suggestedSessionSchedule),onDemandSpecializationSuggestedSchedule.v1(suggestedSessions),partners.v1(homeLink,name),courses.v1(courseProgress,description,membershipIds,startDate,v2Details,vcMembershipIds),v2Details.v1(onDemandSessions,plannedLaunchDate),memberships.v1(grade,vcMembershipId),vcMemberships.v1(certificateCodeWithGrade)&includes=courseIds,memberships,partnerIds,onDemandSpecializationMemberships.v1(suggestedSessionSchedule),courses.v1(courseProgress,membershipIds,v2Details,vcMembershipIds),v2Details.v1(onDemandSessions)&q=primary&courseId={}".format(one_of_specialization_course_id)
courses = list(map(lambda x: {'name': x['name'], 'slug':x['slug'], 'id':x['id']},requests.get(spec_url).json().get('linked').get('courses.v1')))
course_url = "https://www.coursera.org/api/onDemandCourseMaterials.v2/?q=slug&slug={}&includes=modules%2Clessons%2CpassableItemGroups%2CpassableItemGroupChoices%2CpassableLessonElements%2Citems%2Ctracks%2CgradePolicy&fields=moduleIds%2ConDemandCourseMaterialModules.v1(name%2Cslug%2Cdescription%2CtimeCommitment%2ClessonIds%2Coptional%2ClearningObjectives)%2ConDemandCourseMaterialLessons.v1(name%2Cslug%2CtimeCommitment%2CelementIds%2Coptional%2CtrackId)%2ConDemandCourseMaterialPassableItemGroups.v1(requiredPassedCount%2CpassableItemGroupChoiceIds%2CtrackId)%2ConDemandCourseMaterialPassableItemGroupChoices.v1(name%2Cdescription%2CitemIds)%2ConDemandCourseMaterialPassableLessonElements.v1(gradingWeight%2CisRequiredForPassing)%2ConDemandCourseMaterialItems.v2(name%2Cslug%2CtimeCommitment%2CcontentSummary%2CisLocked%2ClockableByItem%2CitemLockedReasonCode%2CtrackId%2ClockedStatus%2CitemLockSummary)%2ConDemandCourseMaterialTracks.v1(passablesCount)&showLockedItems=true"
courseid_url = "https://www.coursera.org/api/onDemandCourses.v1?q=slug&slug={}&includes=instructorIds%2CpartnerIds%2C_links&fields=brandingImage%2CcertificatePurchaseEnabledAt%2Cpartners.v1(squareLogo%2CrectangularLogo)%2Cinstructors.v1(fullName)%2CoverridePartnerLogos%2CsessionsEnabledAt%2CdomainTypes%2CpremiumExperienceVariant%2CisRestrictedMembership"
lecture_url = "https://www.coursera.org/api/onDemandLectureVideos.v1/{}~{}?includes=video&fields=onDemandVideos.v1(sources%2Csubtitles%2CsubtitlesVtt%2CsubtitlesTxt)"
is_specialization = True
if not is_specialization:
course_id = "7H35pMSrEeefQQqXI6t6yg" # plug in appropriate course id, slug, name manually
courses = [{'slug': "ibm-blockchain-essentials-for-developers", 'id':course_id, 'name':'IBM Blockchain Foundation for Developers'}]
def download_file(file_name, file_url):
print(file_url)
start = time.clock()
r = requests.get(file_url, stream = True)
total_length = r.headers.get('content-length')
dl = 0
with open(file_name.strip().replace("/", " ") ,"wb") as file:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
dl += len(chunk)
file.write(chunk)
done = int(50 * dl / int(total_length))
sys.stdout.write("\r[%s%s] %s bps" % ('=' * done, ' ' * (50-done), dl//(time.clock() - start)))
for course in courses:
url = course_url.format(course['slug'])
try:
lectures = [item for item in requests.get(url).json().get('linked').get('onDemandCourseMaterialItems.v2') if item.get('contentSummary').get('typeName') == 'lecture']
print("Downloading course = {} {} {}".format('$'*5, course['name'].replace(':', '-'), '$'*5))
if not os.path.exists(course['name'].replace(':', '-')):
os.makedirs(course['name'].replace(':', '-'))
os.chdir(course['name'].replace(':', '-'))
for i, lecture in enumerate(lectures):
try:
file_name = str(i) + ' - '+lecture['name'] + '.mp4'
if not os.path.exists(file_name.strip().replace("/", " ")):
print("Downloading lecture = {} {} {}".format('%'*5, lecture['name'], '%'*5))
courseid = requests.get(courseid_url.format(course['slug'])).json().get('elements')[0].get('id')
lec_url = lecture_url.format(courseid, lecture['id'])
lec_video_url = requests.get(lec_url).json().get('linked').get('onDemandVideos.v1')[0].get('sources').get('byResolution').get(video_quality).get('mp4VideoUrl')
download_file(file_name, lec_video_url)
except:
pass
os.chdir('..')
except:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment