Skip to content

Instantly share code, notes, and snippets.

@csjx
Created August 24, 2016 22:10
Show Gist options
  • Save csjx/8f312064170da8360e7ae58ca4f5f46e to your computer and use it in GitHub Desktop.
Save csjx/8f312064170da8360e7ae58ca4f5f46e to your computer and use it in GitHub Desktop.
package org.dataone.tests;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
public class XMLValidatorTest {
public static void main(String[] args) {
LineIterator iterator = null;
URL netCDFFile;
File urlList;
try {
urlList = new File("/Users/cjones/d1-test/mstmip/CSDGM-Schemas/mstmip-urls.txt");
iterator = FileUtils.lineIterator(urlList, "UTF-8");
while( iterator.hasNext() ) {
String nc4URLStr = iterator.nextLine().trim();
String fgdcURLStr = nc4URLStr + ".fgdc.xml";
String ncmlURLStr = nc4URLStr + ".ncml.xml";
URL nc4URL = new URL(nc4URLStr);
URL fgdcURL = new URL(fgdcURLStr);
URL ncmlURL = new URL(ncmlURLStr);
System.out.println(nc4URLStr);
System.out.println(fgdcURLStr);
System.out.println(ncmlURLStr);
System.out.println("----------------------------\n");
InputStream fgdcStream = fgdcURL.openStream();
// String fgdcXML = IOUtils.toString(fgdcStream, "ISO-8859-1");
InputStream ncmlStream = ncmlURL.openStream();
// String ncmlXML = IOUtils.toString(ncmlStream, "ISO-8859-1");
//System.out.println(fgdcXML);
//System.out.println(ncmlXML);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder;
builder = factory.newDocumentBuilder();
Document fgdcDocument = builder.parse(fgdcURLStr);
Element rootElement = fgdcDocument.getDocumentElement();
String fgdcSchemaURIStr = rootElement.getAttribute("xsi:noNamespaceSchemaLocation");
URL schemaURL = new URL(fgdcSchemaURIStr);
SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Schema schema = schemaFactory.newSchema(schemaURL);
Source xmlSource = new StreamSource(fgdcStream);
Validator validator = schema.newValidator();
validator.validate(xmlSource);
System.out.println("File is valid: " + fgdcURLStr);
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} finally {
iterator.close();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment