Skip to content

Instantly share code, notes, and snippets.

@aneesha
Last active August 9, 2020 23:14
Show Gist options
  • Save aneesha/eace1ac87939194b7443a9a42d45b1e2 to your computer and use it in GitHub Desktop.
Save aneesha/eace1ac87939194b7443a9a42d45b1e2 to your computer and use it in GitHub Desktop.
'''
Merge/combine courses in the OpenedX OLX format.
'''
import sys
import os
from distutils.dir_util import copy_tree
import json
# Example:
# Place un-tared course exports in folder and create output folder
# $python edx_olxformat_merge_courses.py /Users/username/projects/edx_olx_merge/to_process/MOOCS /Users/username/projects/edx_olx_merge/processed/
def getCourses(folder_with_merged_courses):
course_folders = next(os.walk(folder_with_merged_courses))[1]
course_folders = course_folders[::-1]
return course_folders
print('Number of arguments:', len(sys.argv), 'arguments.')
print('Argument List:', str(sys.argv))
folder_with_merged_courses = sys.argv[1]
output_path = sys.argv[2]
print("Folder with course exports to merge", folder_with_merged_courses)
course_folders = getCourses(folder_with_merged_courses)
print('Number of courses:', len(course_folders))
print(course_folders)
# Make first course parent and copy into output folder
parent_course_path = folder_with_merged_courses + '/' + course_folders[0] + '/course/'
output_course_path = output_path + 'course/'
copy_tree(parent_course_path, output_course_path)
# Copy Problems, HTML, Verticals, Sequentials and Chapters from Child to Parent
for crs in course_folders[1:]:
folders_to_copy = ['chapter', 'html', 'problem', 'sequential', 'vertical', 'video', 'static']
for fld in folders_to_copy:
# copy child files to parent
#print(folder_with_merged_courses + '/' + crs + '/course/' + fld + '/')
#print(output_course_path + fld + '/')
copy_tree(folder_with_merged_courses + '/' + crs + '/course/' + fld + '/' , parent_course_path + fld + '/')
# Merge the static files json between the child and parent courses
# load the parent asset file as a dict
parent_assets = parent_course_path + 'policies/assets.json'
#print(parent_assets)
parent_policy_dict = {} #json.loads(parent_assets)
with open(parent_assets, encoding='utf-8', errors='ignore') as json_data:
parent_policy_dict = json.load(json_data, strict=False)
for crs in course_folders[1:]:
child_policy = folder_with_merged_courses + '/' + crs + '/course/' + 'policies/assets.json'
child_policy_dict = {}
with open(child_policy, encoding='utf-8', errors='ignore') as json_data:
child_policy_dict = json.load(json_data, strict=False)
# pure magic - merges assets dictionary
parent_policy_dict = { k : child_policy_dict[k] for k in set(child_policy_dict) - set(parent_policy_dict) }
with open(output_course_path +'assets.json', 'w') as fp:
json.dump(parent_policy_dict, fp)
# Finally
# 1 - Copy assets.json to policies folder
# 2 - Manually copy chapter.xml contents from child courses and append to parents chapter.xml
# 3 - Compress Parent course
# $tar -cvzf mergedcourse.tar.gz /path/to/exportedmergedcourse
# 4 - Upload to edx
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment