kwilcox/changes.py

## changes.py
#! /usr/bin/env python
import os
import argparse

from lxml import etree


def main(oldxml, newxml, outfile):

    oldtree = None
    if os.path.isfile(oldxml):
        oldtree = etree.parse(oldxml)

    newtree = None
    if os.path.isfile(newxml):
        newtree = etree.parse(newxml)

    if oldtree and newtree:
        # Find removed datasets
        datasetids = etree.XPath(
            "//erddapDatasets/dataset/@datasetID",
            smart_strings=False
        )

        oldids = set(list(datasetids(oldtree)))
        newids = set(list(datasetids(newtree)))
        removedids = list(oldids.difference(newids))

        find_dataset = etree.XPath("//erddapDatasets/dataset[@datasetID=$name]")
        for r in removedids:
            # **** NOTE ****
            # This won't handle a dataset that was marked as active=false
            # and then came back into the picture. It won't remove active=false
            # and the datasets will be in a state of purgatory. This shouldn't
            # happen often, if ever.
            dnode = find_dataset(oldtree, name=r)[0]
            if dnode.get('active') == 'false':
                # Don't do anything, it's ready to be removed from the datasets.xml
                pass
            else:
                # Deactivate the dataset
                dnode.set('active', 'false')
                ds = newtree.getroot()
                ds.append(dnode)

    if not oldtree and newtree:
        print("No existing datasets.xml so using the newly generated one")

    if newtree:
        with open(outfile, 'wt') as f:
            f.write(etree.tostring(newtree, encoding='ISO-8859-1', pretty_print=True, xml_declaration=True).decode('iso-8859-1'))
            f.write('\n')
    else:
        print("Not doing anything. No new datasets.xml file.")


if __name__ == "__main__":

    parser = argparse.ArgumentParser()

    parser.add_argument('oldxml',
                        help="Old datasets.xml file",
                        nargs='?')
    parser.add_argument('newxml',
                        help="New datasets.xml file",
                        nargs='?')
    parser.add_argument('outfile',
                        help="File to write the final datasets.xml",
                        nargs='?')
    args = parser.parse_args()

    main(args.oldxml, args.newxml, args.outfile)

## combine.sh
# Merge all of the individual dataset XML files in a single datasets.xml
FOLDER="./datasets"
OUTPUT_FOLDER="./output"
TMP="$FOLDER/datasets.tmp"
MASTER="$OUTPUT_FOLDER/datasets.xml"
PROCESSED="$FOLDER/datasets.processed"
if [ -e $TMP ]; then
    rm -f $TMP  # Clear file
fi
for x in $(find $FOLDER -regextype posix-extended -regex '.*[0-9]{4}_.*\.xml' -type f | sort); do
    cat $x >> $TMP  # Append contents
done

if [ -e $TMP ]; then
    if [ -e $PROCESSED ]; then
        rm -f $PROCESSED  # Clear file
    fi
    python changes.py $MASTER $TMP $PROCESSED
    rm $TMP
fi

if [ -e $PROCESSED ]; then
    xmllint --format $PROCESSED --noblanks --output $MASTER
    rm $PROCESSED
fi
	#! /usr/bin/env python
	import os
	import argparse

	from lxml import etree


	def main(oldxml, newxml, outfile):

	oldtree = None
	if os.path.isfile(oldxml):
	oldtree = etree.parse(oldxml)

	newtree = None
	if os.path.isfile(newxml):
	newtree = etree.parse(newxml)

	if oldtree and newtree:
	# Find removed datasets
	datasetids = etree.XPath(
	"//erddapDatasets/dataset/@datasetID",
	smart_strings=False
	)

	oldids = set(list(datasetids(oldtree)))
	newids = set(list(datasetids(newtree)))
	removedids = list(oldids.difference(newids))

	find_dataset = etree.XPath("//erddapDatasets/dataset[@datasetID=$name]")
	for r in removedids:
	# ** NOTE **
	# This won't handle a dataset that was marked as active=false
	# and then came back into the picture. It won't remove active=false
	# and the datasets will be in a state of purgatory. This shouldn't
	# happen often, if ever.
	dnode = find_dataset(oldtree, name=r)[0]
	if dnode.get('active') == 'false':
	# Don't do anything, it's ready to be removed from the datasets.xml
	pass
	else:
	# Deactivate the dataset
	dnode.set('active', 'false')
	ds = newtree.getroot()
	ds.append(dnode)

	if not oldtree and newtree:
	print("No existing datasets.xml so using the newly generated one")

	if newtree:
	with open(outfile, 'wt') as f:
	f.write(etree.tostring(newtree, encoding='ISO-8859-1', pretty_print=True, xml_declaration=True).decode('iso-8859-1'))
	f.write('\n')
	else:
	print("Not doing anything. No new datasets.xml file.")


	if __name__ == "__main__":

	parser = argparse.ArgumentParser()

	parser.add_argument('oldxml',
	help="Old datasets.xml file",
	nargs='?')
	parser.add_argument('newxml',
	help="New datasets.xml file",
	nargs='?')
	parser.add_argument('outfile',
	help="File to write the final datasets.xml",
	nargs='?')
	args = parser.parse_args()

	main(args.oldxml, args.newxml, args.outfile)
	# Merge all of the individual dataset XML files in a single datasets.xml
	FOLDER="./datasets"
	OUTPUT_FOLDER="./output"
	TMP="$FOLDER/datasets.tmp"
	MASTER="$OUTPUT_FOLDER/datasets.xml"
	PROCESSED="$FOLDER/datasets.processed"
	if [ -e $TMP ]; then
	rm -f $TMP # Clear file
	fi
	for x in $(find $FOLDER -regextype posix-extended -regex '.[0-9]{4}_.\.xml' -type f \| sort); do
	cat $x >> $TMP # Append contents
	done

	if [ -e $TMP ]; then
	if [ -e $PROCESSED ]; then
	rm -f $PROCESSED # Clear file
	fi
	python changes.py $MASTER $TMP $PROCESSED
	rm $TMP
	fi

	if [ -e $PROCESSED ]; then
	xmllint --format $PROCESSED --noblanks --output $MASTER
	rm $PROCESSED
	fi