ayee/pyspark-split-dataframe-column-literal.py

## pyspark-split-dataframe-column-literal.py
from pyspark.sql.functions import split
df = sc.parallelize([[1, 'Foo:10'], [2, 'Bar:11'], [3,'Car:12']]).toDF(['Event', 'eventtype'])
df = df.withColumn('Thing', split(df.eventtype, ':')[0])
df = df.withColumn('Ranking', split(df.eventtype, ':')[1])
df.collect()

# [Row(Event=1, eventtype=u'Foo:10', Thing=u'Foo', Ranking=u'10'),
#  Row(Event=2, eventtype=u'Bar:11', Thing=u'Bar', Ranking=u'11'),
#  Row(Event=3, eventtype=u'Car:12', Thing=u'Car', Ranking=u'12')]
	from pyspark.sql.functions import split
	df = sc.parallelize([[1, 'Foo:10'], [2, 'Bar:11'], [3,'Car:12']]).toDF(['Event', 'eventtype'])
	df = df.withColumn('Thing', split(df.eventtype, ':')[0])
	df = df.withColumn('Ranking', split(df.eventtype, ':')[1])
	df.collect()

	# [Row(Event=1, eventtype=u'Foo:10', Thing=u'Foo', Ranking=u'10'),
	# Row(Event=2, eventtype=u'Bar:11', Thing=u'Bar', Ranking=u'11'),
	# Row(Event=3, eventtype=u'Car:12', Thing=u'Car', Ranking=u'12')]