Created
July 25, 2021 16:04
-
-
Save saisgit/afe0890b3bbfca5bbc71d2d9d9c4ae2a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Assume below JSON String is stored in a File | |
{ | |
"type": "struct", | |
"fields": [ | |
{ | |
"name": "id", | |
"type": "integer", | |
"nullable": true, | |
"metadata": {} | |
}, | |
{ | |
"name": "first_name", | |
"type": "string", | |
"nullable": true, | |
"metadata": {} | |
}, | |
{ | |
"name": "last_name", | |
"type": "string", | |
"nullable": true, | |
"metadata": {} | |
}, | |
{ | |
"name": "city", | |
"type": "string", | |
"nullable": true, | |
"metadata": {} | |
}, | |
{ | |
"name": "country", | |
"type": "string", | |
"nullable": true, | |
"metadata": {} | |
}, | |
{ | |
"name": "phone", | |
"type": "string", | |
"nullable": true, | |
"metadata": {} | |
} | |
] | |
} | |
*/ | |
import org.apache.spark.sql.types._ | |
// Assume we have read the file and extracted the JSON string from the file | |
val jsonString: String = """{"type":"struct","fields":[{"name":"id","type":"integer","nullable":true,"metadata":{}},{"name":"first_name","type":"string","nullable":true,"metadata":{}},{"name":"last_name","type":"string","nullable":true,"metadata":{}},{"name":"city","type":"string","nullable":true,"metadata":{}},{"name":"country","type":"string","nullable":true,"metadata":{}},{"name":"phone","type":"string","nullable":true,"metadata":{}}]}""" | |
// Convert the JSON String to Spark StructType using spark inbuilt method | |
val jsonToStruct: StructType = ( | |
DataType | |
.fromJson(jsonString) | |
.asInstanceOf[StructType] | |
) | |
// Print Tree String | |
jsonToStruct.printTreeString | |
// Use the Schema to Read the File | |
// CSV File Definition | |
val filePath: String = "/FileStore/tables/Customer.csv" | |
val header: String = "true" | |
val sep: String = ";" | |
val csvDF3 = ( | |
spark.read | |
.option("header", "true") | |
.option("sep", sep) | |
.schema(jsonToStruct) // Passing jsonToStruct Schema to read the files | |
.csv(filePath) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment