Skip to content

Instantly share code, notes, and snippets.

@kwilcox
Last active September 11, 2017 19:38
Show Gist options
  • Save kwilcox/28a6d4dd1aa581aae4210efc684e8789 to your computer and use it in GitHub Desktop.
Save kwilcox/28a6d4dd1aa581aae4210efc684e8789 to your computer and use it in GitHub Desktop.
Compare (2) ERDDAP datasest.xml files and output a new one with the active flag set correctly.
#! /usr/bin/env python
import os
import argparse
from lxml import etree
def main(oldxml, newxml, outfile):
oldtree = None
if os.path.isfile(oldxml):
oldtree = etree.parse(oldxml)
newtree = None
if os.path.isfile(newxml):
newtree = etree.parse(newxml)
if oldtree and newtree:
# Find removed datasets
datasetids = etree.XPath(
"//erddapDatasets/dataset/@datasetID",
smart_strings=False
)
oldids = set(list(datasetids(oldtree)))
newids = set(list(datasetids(newtree)))
removedids = list(oldids.difference(newids))
find_dataset = etree.XPath("//erddapDatasets/dataset[@datasetID=$name]")
for r in removedids:
# **** NOTE ****
# This won't handle a dataset that was marked as active=false
# and then came back into the picture. It won't remove active=false
# and the datasets will be in a state of purgatory. This shouldn't
# happen often, if ever.
dnode = find_dataset(oldtree, name=r)[0]
if dnode.get('active') == 'false':
# Don't do anything, it's ready to be removed from the datasets.xml
pass
else:
# Deactivate the dataset
dnode.set('active', 'false')
ds = newtree.getroot()
ds.append(dnode)
if not oldtree and newtree:
print("No existing datasets.xml so using the newly generated one")
if newtree:
with open(outfile, 'wt') as f:
f.write(etree.tostring(newtree, encoding='ISO-8859-1', pretty_print=True, xml_declaration=True).decode('iso-8859-1'))
f.write('\n')
else:
print("Not doing anything. No new datasets.xml file.")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('oldxml',
help="Old datasets.xml file",
nargs='?')
parser.add_argument('newxml',
help="New datasets.xml file",
nargs='?')
parser.add_argument('outfile',
help="File to write the final datasets.xml",
nargs='?')
args = parser.parse_args()
main(args.oldxml, args.newxml, args.outfile)
# Merge all of the individual dataset XML files in a single datasets.xml
FOLDER="./datasets"
OUTPUT_FOLDER="./output"
TMP="$FOLDER/datasets.tmp"
MASTER="$OUTPUT_FOLDER/datasets.xml"
PROCESSED="$FOLDER/datasets.processed"
if [ -e $TMP ]; then
rm -f $TMP # Clear file
fi
for x in $(find $FOLDER -regextype posix-extended -regex '.*[0-9]{4}_.*\.xml' -type f | sort); do
cat $x >> $TMP # Append contents
done
if [ -e $TMP ]; then
if [ -e $PROCESSED ]; then
rm -f $PROCESSED # Clear file
fi
python changes.py $MASTER $TMP $PROCESSED
rm $TMP
fi
if [ -e $PROCESSED ]; then
xmllint --format $PROCESSED --noblanks --output $MASTER
rm $PROCESSED
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment