Skip to content

Instantly share code, notes, and snippets.

@erikerlandson
Last active July 30, 2017 20:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikerlandson/054652fc2d34ef896717124991196c0e to your computer and use it in GitHub Desktop.
Save erikerlandson/054652fc2d34ef896717124991196c0e to your computer and use it in GitHub Desktop.
stack-dump from trying to write TDigestUDT to parquet
scala> val data = sc.parallelize(Seq(1,2,3,4,5)).toDF("x")
data: org.apache.spark.sql.DataFrame = [x: int]
scala> val udaf = tdigestUDAF[Double].maxDiscrete(10)
udaf: org.isarnproject.sketches.udaf.TDigestUDAF[Double] = TDigestUDAF(0.5,10)
scala> val agg = data.agg(udaf($"x").alias("tdigest"))
agg: org.apache.spark.sql.DataFrame = [tdigest: tdigest]
scala> agg.show()
+--------------------+
| tdigest|
+--------------------+
|TDigestSQL(TDiges...|
+--------------------+
scala> agg.write.parquet("/tmp/agg.parquet")
2017-07-30 13:32:13 ERROR Utils:91 - Aborting task
java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}
^
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at scala.util.Try.getOrElse(Try.scala:79)
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414)
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
2017-07-30 13:32:13 WARN FileOutputCommitter:569 - Could not delete file:/tmp/agg.parquet/_temporary/0/_temporary/attempt_20170730133213_0012_m_000000_0
2017-07-30 13:32:13 ERROR FileFormatWriter:70 - Job job_20170730133213_0012 aborted.
2017-07-30 13:32:13 ERROR Executor:91 - Exception in task 0.0 in stage 12.0 (TID 52)
org.apache.spark.SparkException: Task failed while writing rows
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}
^
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at scala.util.Try.getOrElse(Try.scala:79)
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414)
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261)
... 8 more
2017-07-30 13:32:13 WARN TaskSetManager:66 - Lost task 0.0 in stage 12.0 (TID 52, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}
^
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at scala.util.Try.getOrElse(Try.scala:79)
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414)
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261)
... 8 more
2017-07-30 13:32:13 ERROR TaskSetManager:70 - Task 0 in stage 12.0 failed 1 times; aborting job
2017-07-30 13:32:13 ERROR FileFormatWriter:91 - Aborting job null.
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 12.0 failed 1 times, most recent failure: Lost task 0.0 in stage 12.0 (TID 52, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}
^
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at scala.util.Try.getOrElse(Try.scala:79)
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414)
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261)
... 8 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:188)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:173)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:438)
at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:474)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:610)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217)
at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:509)
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:37)
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:42)
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:44)
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:46)
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:48)
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:50)
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:52)
at $line29.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:54)
at $line29.$read$$iw$$iw$$iw$$iw.<init>(<console>:56)
at $line29.$read$$iw$$iw$$iw.<init>(<console>:58)
at $line29.$read$$iw$$iw.<init>(<console>:60)
at $line29.$read$$iw.<init>(<console>:62)
at $line29.$read.<init>(<console>:64)
at $line29.$read$.<init>(<console>:68)
at $line29.$read$.<clinit>(<console>)
at $line29.$eval$.$print$lzycompute(<console>:7)
at $line29.$eval$.$print(<console>:6)
at $line29.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638)
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637)
at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565)
at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807)
at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681)
at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395)
at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415)
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:923)
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97)
at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909)
at org.apache.spark.repl.Main$.doMain(Main.scala:70)
at org.apache.spark.repl.Main$.main(Main.scala:53)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:783)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: org.apache.spark.SparkException: Task failed while writing rows
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}
^
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at scala.util.Try.getOrElse(Try.scala:79)
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414)
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261)
... 8 more
org.apache.spark.SparkException: Job aborted.
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:215)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:173)
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:438)
at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:474)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:610)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217)
at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:509)
... 52 elided
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 12.0 failed 1 times, most recent failure: Lost task 0.0 in stage 12.0 (TID 52, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}
^
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at scala.util.Try.getOrElse(Try.scala:79)
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414)
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261)
... 8 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:188)
... 86 more
Caused by: org.apache.spark.SparkException: Task failed while writing rows
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}
^
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414)
at scala.util.Try.getOrElse(Try.scala:79)
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414)
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341)
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302)
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37)
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256)
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261)
... 8 more
scala>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment