Created
August 19, 2016 11:57
-
-
Save oluies/3b225b3a96fbcacd50f85c350b2c83df to your computer and use it in GitHub Desktop.
Spark: transform timestamp text to timestamp and extract some parts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val df =Seq((1L,"03JUN2015 19.28.00"),(2L,"#$@#@#")).toDF("id","dts") | |
import org.apache.spark.sql.functions.dayofmonth | |
import org.apache.spark.sql.functions.unix_timestamp | |
df.withColumn("ts", unix_timestamp($"dts","ddMMMyy HH.mm.ss").cast("timestamp")) | |
.withColumn("dom", dayofmonth($"ts")) | |
.withColumn("month", month($"ts")) | |
.withColumn("yesar", year($"ts")) | |
.show(2,false) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
from pyspark.sql.functions import from_unixtime, unix_timestamp
from pyspark.sql.types import TimestampType
df.select((from_unixtime(unix_timestamp(
df.datetime, "yy-MMM-dd h.mm.ss.SSSSSS aa"
))).cast(TimestampType()).alias("datetime"))