len0rd/splitUSStateCountyKMLCensusFiles.groovy

## splitUSStateCountyKMLCensusFiles.groovy
import groovy.xml.*
/**
 *  Removes some additional MetaData I dont want and splits the
 *  US county KML file into seperate files for each state
 *  Get the census data from:
 *  https://www.census.gov/geo/maps-data/data/kml/kml_counties.html
 *  The higher the ratio, the lower the resolution
 *
 *  This data can be easily imported into a Google MyMap
 *
 *  @author len0rd
 *  @since 2018-08-02
 */


def inFile  = "allUS.kml"
def outFolder = 'split/'
// need the false, false so that the slurper has no 'namespace awareness'
// this prevents printing out weird tags
def kml = new XmlSlurper(false, false).parse(inFile)
println "parsed!"

def baseSplitKml = """<kml
    xmlns:gx="http://www.google.com/kml/ext/2.2"
    xmlns:atom="http://www.w3.org/2005/Atom"
    xmlns="http://www.opengis.net/kml/2.2">
    <Document>
        <name>cb_2017_us_county_500k</name>
        <visibility>1</visibility>
        <Schema name="cb_2017_us_county_500k" id="kml_schema_ft_cb_2017_us_county_500k">
            <SimpleField type="xsd:string" name="STATEFP">
                <displayName>STATEFP</displayName>
            </SimpleField>
            <SimpleField type="xsd:string" name="COUNTYFP">
                <displayName>COUNTYFP</displayName>
            </SimpleField>
            <SimpleField type="xsd:string" name="COUNTYNS">
                <displayName>COUNTYNS</displayName>
            </SimpleField>
            <SimpleField type="xsd:string" name="AFFGEOID">
                <displayName>AFFGEOID</displayName>
            </SimpleField>
            <SimpleField type="xsd:string" name="NAME">
                <displayName>NAME</displayName>
            </SimpleField>
        </Schema>
        <Folder id="kml_ft_cb_2017_us_county_500k">
        </Folder>
    </Document>
</kml>"""

def stateFIPToName = ['01':'Alabama','02':'Alaska','04':'Arizona','05':'Arkansas','06':'California','08':'Colorado','09':'Connecticut','10':'Delaware','11':'District of Columbia','12':'Florida','13':'Georgia','15':'Hawaii','16':'Idaho','17':'Illinois','18':'Indiana','19':'Iowa','20':'Kansas','21':'Kentucky','22':'Louisiana','23':'Maine','24':'Maryland','25':'Massachusetts','26':'Michigan','27':'Minnesota','28':'Mississippi','29':'Missouri','30':'Montana','31':'Nebraska','32':'Nevada','33':'New Hampshire','34':'New Jersey','35':'New Mexico','36':'New York','37':'North Carolina','38':'North Dakota','39':'Ohio','40':'Oklahoma','41':'Oregon','42':'Pennsylvania','44':'Rhode Island','45':'South Carolina','46':'South Dakota','47':'Tennessee','48':'Texas','49':'Utah','50':'Vermont','51':'Virginia','53':'Washington','54':'West Virginia','55':'Wisconsin','56':'Wyoming']

outFolderFile = new File(outFolder)
if (!outFolderFile.exists()) {
    println "create the folder for output"
    outFolderFile.mkdirs();
}

//we dont care about style (obviously)
kml.Document.Style.replaceNode{}
// first remove column names for extended data we dont want
kml.Document.Schema.SimpleField.findAll{ //these be the headers we dont want
    it.@name.text().equals('LSAD') || it.@name.text().equals('ALAND') || it.@name.text().equals('AWATER') || it.@name.text().equals('GEOID')}.each { it ->
        it.replaceNode{}
    }

def iter = 0
def polyCount = 0
def multiCount = 0
def currentSplitKml = null //this assumes that counties are listed in order by stateFIPS -> countyFIPS
def currentSplitKmlFile = null

kml.Document.Folder.Placemark.each {
    it.description.replaceNode{} //remove description node
    it.styleUrl.replaceNode{} //remove styleurl node

    //remove extended data we dont want:
    it.ExtendedData.SchemaData.SimpleData.findAll {
        it.@name.text().equals('LSAD') || it.@name.text().equals('ALAND') || it.@name.text().equals('AWATER') || it.@name.text().equals('GEOID')}.each { dead ->
            dead.replaceNode{}
    }

    if (it.Polygon.size() == 1) {
        it.Polygon.extrude.replaceNode{}
        it.Polygon.tessellate.replaceNode{}
        it.Polygon.altitudeMode.replaceNode{}
        polyCount++
    } else if (it.MultiGeometry.size() == 1) {
        it.MultiGeometry.Polygon.each {
            it.extrude.replaceNode{}
            it.tessellate.replaceNode{}
            it.altitudeMode.replaceNode{}
        }
        multiCount++
    }
    iter++

    //see what state this county is in:
    it.ExtendedData.SchemaData.SimpleData.findAll{it.@name.text().equals('STATEFP')}.each{st ->
        def stateFip = st.text()
        def stateFipFile = new File( outFolder + "stCounties-" + stateFIPToName[stateFip] + ".kml")
        if (stateFipFile.exists()) {
            // if the file already exists, that means we're creating the kml for it now
            //append this node
            currentSplitKml.Document.Folder.appendNode(it)
        } else {
            println "there is no file for state " + st.text() + " creating one"

            if (currentSplitKml != null) {
                println "writing current kml split out to file:: " + currentSplitKmlFile
                currentSplitKmlFile.withWriter('utf-8') {
                    writer -> writer.writeLine XmlUtil.serialize(currentSplitKml)
                }
            }
            //create the base file:
            stateFipFile.withWriter('utf-8') {
                writer -> writer.writeLine '<?xml version="1.0" encoding="UTF-8"?>'
            }
            currentSplitKml = new XmlSlurper(false, false).parseText(baseSplitKml)
            //add the current county to our new splitkml
            currentSplitKml.Document.Folder.appendNode(it)
            currentSplitKmlFile = stateFipFile
        }
    }
}

println "================================================="
println "there are " + iter + " counties in this file"
println polyCount + " polys, " + multiCount + " multi-polys"
println "================================================="


println "saving last split file"
if (currentSplitKml != null) {
    currentSplitKmlFile.withWriter('utf-8') {
        writer -> writer.writeLine XmlUtil.serialize(currentSplitKml)//serialized
    }
}
	import groovy.xml.*
	/**
	* Removes some additional MetaData I dont want and splits the
	* US county KML file into seperate files for each state
	* Get the census data from:
	* https://www.census.gov/geo/maps-data/data/kml/kml_counties.html
	* The higher the ratio, the lower the resolution
	*
	* This data can be easily imported into a Google MyMap
	*
	* @author len0rd
	* @since 2018-08-02
	*/


	def inFile = "allUS.kml"
	def outFolder = 'split/'
	// need the false, false so that the slurper has no 'namespace awareness'
	// this prevents printing out weird tags
	def kml = new XmlSlurper(false, false).parse(inFile)
	println "parsed!"

	def baseSplitKml = """<kml
	xmlns:gx="http://www.google.com/kml/ext/2.2"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns="http://www.opengis.net/kml/2.2">
	<Document>
	<name>cb_2017_us_county_500k</name>
	<visibility>1</visibility>
	<Schema name="cb_2017_us_county_500k" id="kml_schema_ft_cb_2017_us_county_500k">
	<SimpleField type="xsd:string" name="STATEFP">
	<displayName>STATEFP</displayName>
	</SimpleField>
	<SimpleField type="xsd:string" name="COUNTYFP">
	<displayName>COUNTYFP</displayName>
	</SimpleField>
	<SimpleField type="xsd:string" name="COUNTYNS">
	<displayName>COUNTYNS</displayName>
	</SimpleField>
	<SimpleField type="xsd:string" name="AFFGEOID">
	<displayName>AFFGEOID</displayName>
	</SimpleField>
	<SimpleField type="xsd:string" name="NAME">
	<displayName>NAME</displayName>
	</SimpleField>
	</Schema>
	<Folder id="kml_ft_cb_2017_us_county_500k">
	</Folder>
	</Document>
	</kml>"""

	def stateFIPToName = ['01':'Alabama','02':'Alaska','04':'Arizona','05':'Arkansas','06':'California','08':'Colorado','09':'Connecticut','10':'Delaware','11':'District of Columbia','12':'Florida','13':'Georgia','15':'Hawaii','16':'Idaho','17':'Illinois','18':'Indiana','19':'Iowa','20':'Kansas','21':'Kentucky','22':'Louisiana','23':'Maine','24':'Maryland','25':'Massachusetts','26':'Michigan','27':'Minnesota','28':'Mississippi','29':'Missouri','30':'Montana','31':'Nebraska','32':'Nevada','33':'New Hampshire','34':'New Jersey','35':'New Mexico','36':'New York','37':'North Carolina','38':'North Dakota','39':'Ohio','40':'Oklahoma','41':'Oregon','42':'Pennsylvania','44':'Rhode Island','45':'South Carolina','46':'South Dakota','47':'Tennessee','48':'Texas','49':'Utah','50':'Vermont','51':'Virginia','53':'Washington','54':'West Virginia','55':'Wisconsin','56':'Wyoming']

	outFolderFile = new File(outFolder)
	if (!outFolderFile.exists()) {
	println "create the folder for output"
	outFolderFile.mkdirs();
	}

	//we dont care about style (obviously)
	kml.Document.Style.replaceNode{}
	// first remove column names for extended data we dont want
	kml.Document.Schema.SimpleField.findAll{ //these be the headers we dont want
	it.@name.text().equals('LSAD') \|\| it.@name.text().equals('ALAND') \|\| it.@name.text().equals('AWATER') \|\| it.@name.text().equals('GEOID')}.each { it ->
	it.replaceNode{}
	}

	def iter = 0
	def polyCount = 0
	def multiCount = 0
	def currentSplitKml = null //this assumes that counties are listed in order by stateFIPS -> countyFIPS
	def currentSplitKmlFile = null

	kml.Document.Folder.Placemark.each {
	it.description.replaceNode{} //remove description node
	it.styleUrl.replaceNode{} //remove styleurl node

	//remove extended data we dont want:
	it.ExtendedData.SchemaData.SimpleData.findAll {
	it.@name.text().equals('LSAD') \|\| it.@name.text().equals('ALAND') \|\| it.@name.text().equals('AWATER') \|\| it.@name.text().equals('GEOID')}.each { dead ->
	dead.replaceNode{}
	}

	if (it.Polygon.size() == 1) {
	it.Polygon.extrude.replaceNode{}
	it.Polygon.tessellate.replaceNode{}
	it.Polygon.altitudeMode.replaceNode{}
	polyCount++
	} else if (it.MultiGeometry.size() == 1) {
	it.MultiGeometry.Polygon.each {
	it.extrude.replaceNode{}
	it.tessellate.replaceNode{}
	it.altitudeMode.replaceNode{}
	}
	multiCount++
	}
	iter++

	//see what state this county is in:
	it.ExtendedData.SchemaData.SimpleData.findAll{it.@name.text().equals('STATEFP')}.each{st ->
	def stateFip = st.text()
	def stateFipFile = new File( outFolder + "stCounties-" + stateFIPToName[stateFip] + ".kml")
	if (stateFipFile.exists()) {
	// if the file already exists, that means we're creating the kml for it now
	//append this node
	currentSplitKml.Document.Folder.appendNode(it)
	} else {
	println "there is no file for state " + st.text() + " creating one"

	if (currentSplitKml != null) {
	println "writing current kml split out to file:: " + currentSplitKmlFile
	currentSplitKmlFile.withWriter('utf-8') {
	writer -> writer.writeLine XmlUtil.serialize(currentSplitKml)
	}
	}
	//create the base file:
	stateFipFile.withWriter('utf-8') {
	writer -> writer.writeLine '<?xml version="1.0" encoding="UTF-8"?>'
	}
	currentSplitKml = new XmlSlurper(false, false).parseText(baseSplitKml)
	//add the current county to our new splitkml
	currentSplitKml.Document.Folder.appendNode(it)
	currentSplitKmlFile = stateFipFile
	}
	}
	}

	println "================================================="
	println "there are " + iter + " counties in this file"
	println polyCount + " polys, " + multiCount + " multi-polys"
	println "================================================="


	println "saving last split file"
	if (currentSplitKml != null) {
	currentSplitKmlFile.withWriter('utf-8') {
	writer -> writer.writeLine XmlUtil.serialize(currentSplitKml)//serialized
	}
	}