Skip to content

Instantly share code, notes, and snippets.

@oluies
Created December 1, 2017 08:32
Show Gist options
  • Save oluies/cc18361f2fb7f97ec179dbaed0e32bd7 to your computer and use it in GitHub Desktop.
Save oluies/cc18361f2fb7f97ec179dbaed0e32bd7 to your computer and use it in GitHub Desktop.
spark-read-from-string
val csv:String = """
|rowKeyLineage |eventTimestamp|dataEventId|dataDictId |sourceKey |transport|eventType |recordId|processingTime|application|entity |product|lifecycle |lineDate |dayOfMonth|dayOfYear|
|1488327264000:2001200:MUREX:DK_LEI_6000:BOND:CONTRACT |1488327264000 |2001200 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6000|BOND |CONTRACT |2017-03-01 01:14:24|1 |60 |
|1488327264000:2001200:MUREX:DK_LEI_6100:REPO:CONTRACT |1488327264000 |2001200 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6100|REPO |CONTRACT |2017-03-01 01:14:24|1 |60 |
|1488327264000:2001200:MUREX:DK_LEI_6200:BOND:CONTRACT |1488327264000 |2001200 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6200|BOND |CONTRACT |2017-03-01 01:14:24|1 |60 |
|1488327264000:2001200:MUREX:DK_LEI_6200:REPO:CONTRACT |1488327264000 |2001200 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6200|REPO |CONTRACT |2017-03-01 01:14:24|1 |60 |
|1488327264000:2001200:MUREX:DK_LEI_6300:BOND:CONTRACT |1488327264000 |2001200 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6300|BOND |CONTRACT |2017-03-01 01:14:24|1 |60 |
|1488327264000:2001200:MUREX:NO_LEI_2000:REPO:CONTRACT |1488327264000 |2001200 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |MUREX |NO_LEI_2000|REPO |CONTRACT |2017-03-01 01:14:24|1 |60 |
|1488327264000:2001400:MUREX:DK_LEI_6000:BOND:RECONCILIATION |1488327264000 |2001400 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6000|BOND |RECONCILIATION|2017-03-01 01:14:24|1 |60 |
|1488327264000:2001400:MUREX:DK_LEI_6100:REPO:RECONCILIATION |1488327264000 |2001400 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6100|REPO |RECONCILIATION|2017-03-01 01:14:24|1 |60 |
|1488327264000:2001400:MUREX:DK_LEI_6200:BOND:RECONCILIATION |1488327264000 |2001400 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6200|BOND |RECONCILIATION|2017-03-01 01:14:24|1 |60 |
|1488327264000:2001400:MUREX:DK_LEI_6200:REPO:RECONCILIATION |1488327264000 |2001400 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6200|REPO |RECONCILIATION|2017-03-01 01:14:24|1 |60 |
|1488327264000:2001400:MUREX:DK_LEI_6300:BOND:RECONCILIATION |1488327264000 |2001400 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |DISTRIBUTE| |1512074938523 |MUREX |DK_LEI_6300|BOND |RECONCILIATION|2017-03-01 01:14:24|1 |60 |
|1488327264000:2001400:MUREX:NO_LEI_2000:REPO:RECONCILIATION |1488327264000 |2001400 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |DISTRIBUTE| |1512074938523 |MUREX |NO_LEI_2000|REPO |RECONCILIATION|2017-03-01 01:14:24|1 |60 |
|1488328128000:2001300:TRS:DK_LEI_6000:BOND:CONTRACT |1488328128000 |2001300 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |RECEIVE | |1512074938523 |TRS |DK_LEI_6000|BOND |CONTRACT |2017-03-01 01:28:48|1 |60 |
|1488328128000:2001300:TRS:DK_LEI_6100:REPO:CONTRACT |1488328128000 |2001300 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |RECEIVE | |1512074938523 |TRS |DK_LEI_6100|REPO |CONTRACT |2017-03-01 01:28:48|1 |60 |
|1488328128000:2001300:TRS:DK_LEI_6200:BOND:CONTRACT |1488328128000 |2001300 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |RECEIVE | |1512074938523 |TRS |DK_LEI_6200|BOND |CONTRACT |2017-03-01 01:28:48|1 |60 |
|1488328128000:2001300:TRS:DK_LEI_6200:REPO:CONTRACT |1488328128000 |2001300 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |RECEIVE | |1512074938523 |TRS |DK_LEI_6200|REPO |CONTRACT |2017-03-01 01:28:48|1 |60 |
|1488328128000:2001300:TRS:DK_LEI_6300:BOND:CONTRACT |1488328128000 |2001300 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |RECEIVE | |1512074938523 |TRS |DK_LEI_6300|BOND |CONTRACT |2017-03-01 01:28:48|1 |60 |
|1488328128000:2001300:TRS:NO_LEI_2000:REPO:CONTRACT |1488328128000 |2001300 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |RECEIVE | |1512074938523 |TRS |NO_LEI_2000|REPO |CONTRACT |2017-03-01 01:28:48|1 |60 |
|1488328128000:2001500:CALYPSO:DK_LEI_6000:BOND:RECONCILIATION|1488328128000 |2001500 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |RECEIVE | |1512074938523 |CALYPSO |DK_LEI_6000|BOND |RECONCILIATION|2017-03-01 01:28:48|1 |60 |
|1488328128000:2001500:CALYPSO:DK_LEI_6100:REPO:RECONCILIATION|1488328128000 |2001500 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |RECEIVE | |1512074938523 |CALYPSO |DK_LEI_6100|REPO |RECONCILIATION|2017-03-01 01:28:48|1 |60 |
|1488328128000:2001500:CALYPSO:DK_LEI_6200:BOND:RECONCILIATION|1488328128000 |2001500 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |RECEIVE | |1512074938523 |CALYPSO |DK_LEI_6200|BOND |RECONCILIATION|2017-03-01 01:28:48|1 |60 |
|1488328128000:2001500:CALYPSO:DK_LEI_6200:REPO:RECONCILIATION|1488328128000 |2001500 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |RECEIVE | |1512074938523 |CALYPSO |DK_LEI_6200|REPO |RECONCILIATION|2017-03-01 01:28:48|1 |60 |
|1488328128000:2001500:CALYPSO:DK_LEI_6300:BOND:RECONCILIATION|1488328128000 |2001500 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |RECEIVE | |1512074938523 |CALYPSO |DK_LEI_6300|BOND |RECONCILIATION|2017-03-01 01:28:48|1 |60 |
|1488328128000:2001500:CALYPSO:NO_LEI_2000:REPO:RECONCILIATION|1488328128000 |2001500 |MUREXRECONCILIATION_DELTA|20170301MUREXRECONCILIATION.csv|FTP |RECEIVE | |1512074938523 |CALYPSO |NO_LEI_2000|REPO |RECONCILIATION|2017-03-01 01:28:48|1 |60 |
|1488329700000:2001600:TRS:DK_LEI_6000:BOND:CONTRACT |1488329700000 |2001600 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |TRS |DK_LEI_6000|BOND |CONTRACT |2017-03-01 01:55:00|1 |60 |
|1488329700000:2001600:TRS:DK_LEI_6200:BOND:CONTRACT |1488329700000 |2001600 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |TRS |DK_LEI_6200|BOND |CONTRACT |2017-03-01 01:55:00|1 |60 |
|1488329700000:2001600:TRS:DK_LEI_6200:REPO:CONTRACT |1488329700000 |2001600 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |TRS |DK_LEI_6200|REPO |CONTRACT |2017-03-01 01:55:00|1 |60 |
|1488329700000:2001600:TRS:DK_LEI_6300:BOND:CONTRACT |1488329700000 |2001600 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |TRS |DK_LEI_6300|BOND |CONTRACT |2017-03-01 01:55:00|1 |60 |
|1488329700000:2001600:TRS:NO_LEI_2000:REPO:CONTRACT |1488329700000 |2001600 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv |FTP |DISTRIBUTE| |1512074938523 |TRS |NO_LEI_2000|REPO |CONTRACT |2017-03-01 01:55:00|1 |60 |
|1488329700000:20016300:TRS:DK_LEI_6100:REPO:CONTRACT |1488329700000 |20016300 |MUREXCONTRACT_DELTA |20170301MUREXCONTRACT.csv|FTP |DISTRIBUTE| |1512074938523 |TRS |DK_LEI_6100|REPO |CONTRACT |2017-03-01 01:55:00|1 |60
""".stripMargin
import spark.implicits._
val csvds = sc.parallelize(csv.split("\\r?\\n").toList ).toDS
val csvdsDF: DataFrame = spark.read
// .schema(events.schema)
.option("header", "true")
.option("inferSchema", "true")
.option("delimiter", "|")
.option("mode","PERMISSIVE")
.csv(csvds)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment