Last active
November 8, 2019 06:24
-
-
Save lakshay-arora/ca3633e9894082683435d51e17bca132 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyspark.sql.types as tp | |
# define the schema | |
my_schema = tp.StructType([ | |
tp.StructField(name= 'Batsman', dataType= tp.IntegerType(), nullable= True), | |
tp.StructField(name= 'Batsman_Name', dataType= tp.StringType(), nullable= True), | |
tp.StructField(name= 'Bowler', dataType= tp.IntegerType(), nullable= True), | |
tp.StructField(name= 'Bowler_Name', dataType= tp.StringType(), nullable= True), | |
tp.StructField(name= 'Commentary', dataType= tp.StringType(), nullable= True), | |
tp.StructField(name= 'Detail', dataType= tp.StringType(), nullable= True), | |
tp.StructField(name= 'Dismissed', dataType= tp.IntegerType(), nullable= True), | |
tp.StructField(name= 'Id', dataType= tp.IntegerType(), nullable= True), | |
tp.StructField(name= 'Isball', dataType= tp.BooleanType(), nullable= True), | |
tp.StructField(name= 'Isboundary', dataType= tp.BinaryType(), nullable= True), | |
tp.StructField(name= 'Iswicket', dataType= tp.BinaryType(), nullable= True), | |
tp.StructField(name= 'Over', dataType= tp.DoubleType(), nullable= True), | |
tp.StructField(name= 'Runs', dataType= tp.IntegerType(), nullable= True), | |
tp.StructField(name= 'Timestamp', dataType= tp.TimestampType(), nullable= True) | |
]) | |
# read the data again with the defined schema | |
my_data = spark.read.csv('ind-ban-comment.csv',schema= my_schema,header= True) | |
# print the schema | |
my_data.printSchema() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment