karpanGit/pyspark, generate dataframe from dictionary with and without a schema.py

## pyspark, generate dataframe from dictionary with and without a schema.py
# create dataframe from dictionary, without a schema
df = [{'one': 1, 'two': [1,2,3]}, {'one': 101}]
df = spark.createDataFrame(df)
df.printSchema()
# root
#  |-- one: long (nullable = true)
#  |-- two: array (nullable = true)
#  |    |-- element: long (containsNull = true)
df.show()
# |one|      two|
# +---+---------+
# |  1|[1, 2, 3]|
# |101|     null|
# +---+---------+

# create dataframe from dictionary, with a schema
import pyspark.sql.types as T
df = [{'one': 1, 'two': [1,2,3]}, {'one': 101}]
schema = T.StructType([
    T.StructField('one', LongType()),
    T.StructField('two', ArrayType(LongType()))
])
df = spark.createDataFrame(df, schema=schema)
df.printSchema()
# root
#  |-- one: long (nullable = true)
#  |-- two: array (nullable = true)
#  |    |-- element: long (containsNull = true)
df.show()
# |one|      two|
# +---+---------+
# |  1|[1, 2, 3]|
# |101|     null|
# +---+---------+
	# create dataframe from dictionary, without a schema
	df = [{'one': 1, 'two': [1,2,3]}, {'one': 101}]
	df = spark.createDataFrame(df)
	df.printSchema()
	# root
	# \|-- one: long (nullable = true)
	# \|-- two: array (nullable = true)
	# \| \|-- element: long (containsNull = true)
	df.show()
	# \|one\| two\|
	# +---+---------+
	# \| 1\|[1, 2, 3]\|
	# \|101\| null\|
	# +---+---------+

	# create dataframe from dictionary, with a schema
	import pyspark.sql.types as T
	df = [{'one': 1, 'two': [1,2,3]}, {'one': 101}]
	schema = T.StructType([
	T.StructField('one', LongType()),
	T.StructField('two', ArrayType(LongType()))
	])
	df = spark.createDataFrame(df, schema=schema)
	df.printSchema()
	# root
	# \|-- one: long (nullable = true)
	# \|-- two: array (nullable = true)
	# \| \|-- element: long (containsNull = true)
	df.show()
	# \|one\| two\|
	# +---+---------+
	# \| 1\|[1, 2, 3]\|
	# \|101\| null\|
	# +---+---------+