-
-
Save itsbenweeks/b9b97c67846382c98240 to your computer and use it in GitHub Desktop.
#!/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
A number of functions to rename and modify edXml video tags in order to make them mobile ready. | |
.. note:: This assumes that the csv file is ordered youtube_id, edx_id | |
""" | |
import sys | |
import xml.etree.ElementTree as ET | |
import csv | |
def print_help(): | |
print('''usage: edx_id_vid -h | |
edx_id_vid -a csv_filename xml_filename | |
edx_id_vid -d xml_filename''') | |
def load_csv(csv_fn): | |
'''Load a csv document.''' | |
edx_id_file = open(csv_fn, 'r') | |
edx_id_csv = csv.reader(edx_id_file) | |
edx_id_dict = {} | |
for line in edx_id_csv: | |
if line[0] != '': | |
edx_id_dict[line[0]] = line[1] | |
return edx_id_dict | |
def load_xml(xml_fn): | |
'''Load an xml document.''' | |
video_xml = ET.parse(xml_fn) | |
return video_xml | |
def remove_edx_ids(xml_fn): | |
''' | |
Check an xml file for video tags, if any edx_video_ids exist, delete them. | |
''' | |
video_xml = load_xml(xml_fn) | |
root = video_xml.getroot() | |
for video in root.iter('video'): | |
if 'edx_video_id' in video.keys(): | |
del video.attrib['edx_video_id'] | |
print('edx_video_id removed in {}'.format(xml_fn)) | |
video_xml.write(xml_fn) | |
print ('Wrote {}'.format(xml_fn)) | |
def add_edx_ids(csv_fn, xml_fn): | |
''' | |
Compare any youtube_x_x... or youtube attributes and gather youtube IDs. | |
If any youtube ids exist that are in the csv, add an edx_video_id attribute. | |
''' | |
video_xml = load_xml(xml_fn) | |
edx_id_dict = load_csv(csv_fn) | |
root = video_xml.getroot() | |
attribs = ['youtube_id_0_75', | |
'youtube_id_1_0', | |
'youtube_id_1_25', | |
'youtube_id_1_5', | |
] | |
for video in root.iter('video'): | |
youtube_ids = [] | |
yt_ids = video.attrib['youtube'].split(',') | |
for yt_id in yt_ids: | |
youtube_ids.append(yt_id.split(':')[1]) | |
for attrib in attribs: | |
if attrib in video.attrib.keys(): | |
youtube_ids.append(video.attrib[attrib]) | |
for youtube_id in youtube_ids: | |
if youtube_id in edx_id_dict.keys(): | |
video.set('edx_video_id', edx_id_dict[youtube_id]) | |
video_xml.write(xml_fn) | |
print ('Wrote {}'.format(xml_fn)) | |
break | |
if __name__ == '__main__': | |
if ('-h' in sys.argv[1]): | |
print_help() | |
elif ('-a' in sys.argv[1]): | |
add_edx_ids(sys.argv[2], sys.argv[3]) | |
elif ('-d' in sys.argv[1]): | |
remove_edx_ids(sys.argv[2]) |
For adding edX ids, this worked for me
$ for fn in video/*.xml; do python edx_id_vid.py -a csv_of_IDs.csv $fn; done
without the ';' after 'do'
Saving .csv files in mac OS format produced an error:
_csv.Error: new-line character seen in unquoted field - do you need to open the file in universal-newline mode?
to overcome this error, I saved the csv file in 'windows comma separated file' per suggestions from stack overflow and it worked
http://stackoverflow.com/questions/17315635/csv-new-line-character-seen-in-unquoted-field-error
I think that line 47
if 'edx_video_id' in video.keys:
should be
if 'edx_video_id' in video.attrib:
Oops, I didn't notice your comments until now. Sorry, @srayyan. video.attrib or video.keys are instance methods, and I was treating them like objects here. I just fixed that.
This script serves to add edx_video_id attributes to video files from a CSV, but also to remove them for our own instance of OpenEdX, which does not use the video abstraction layer (VAL).
To run it on a number of files I use this bash one-liner:
$ for fn in video/*.xml; do; python edx_id_vid.py -d $fn; done
or if you want to add edx_video_id attributes:
$ for fn in video/*.xml; do; python edx_id_vid.py -a csv_of_IDs.csv $fn; done