Skip to content

Instantly share code, notes, and snippets.

@stuzero
Last active September 2, 2021 02:31
Show Gist options
  • Save stuzero/073a714c6caac7757459372f781c84d2 to your computer and use it in GitHub Desktop.
Save stuzero/073a714c6caac7757459372f781c84d2 to your computer and use it in GitHub Desktop.
US County Shapefile loaded in Apache Sedona (GeoSpark)
// ./spark-shell \
// --packages org.apache.sedona:sedona-core-3.0_2.12:1.0.1-incubating,org.apache.sedona:sedona-sql-3.0_2.12:1.0.1-incubating,org.locationtech.jts:jts-core:1.18.2,org.datasyslab:geotools-wrapper:geotools-24.1 \
// --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
// --conf spark.kryo.registrator=org.apache.sedona.core.serde.SedonaKryoRegistrator
import org.apache.sedona.core.formatMapper.shapefileParser.ShapefileReader
import org.apache.sedona.core.spatialRDD.SpatialRDD
import org.apache.sedona.sql.utils.{Adapter, SedonaSQLRegistrator}
import org.locationtech.jts.geom.{Coordinate, Geometry, GeometryFactory}
SedonaSQLRegistrator.registerAll(spark)
System.setProperty("sedona.global.charset", "utf8")
val shapefileInputLocation="/home/ubuntu/geo/tl_2020_us_county"
var countyRDD = ShapefileReader.readToGeometryRDD(sc, shapefileInputLocation)
var rawSpatialDf = Adapter.toDf(countyRDD, spark)
rawSpatialDf.createOrReplaceTempView("rawSpatialDf")
var transformSQL = "SELECT geometry AS GEOM, STATEFP, COUNTYFP, COUNTYNS, GEOID, NAME, NAMELSAD, LSAD, CLASSFP, MTFCC, CSAFP, CBSAFP, METDIVFP, FUNCSTAT, CAST(ALAND AS Integer), CAST(AWATER AS Integer), ST_Point(CAST (INTPTLON AS decimal(24,20)),CAST (INTPTLAT AS decimal(24,20))) AS INTPTLATLONG FROM rawSpatialDf"
var countyDf = spark.sql(transformSQL.stripMargin)
countyDf.printSchema
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment