Last active
July 30, 2017 20:41
-
-
Save erikerlandson/054652fc2d34ef896717124991196c0e to your computer and use it in GitHub Desktop.
stack-dump from trying to write TDigestUDT to parquet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scala> val data = sc.parallelize(Seq(1,2,3,4,5)).toDF("x") | |
data: org.apache.spark.sql.DataFrame = [x: int] | |
scala> val udaf = tdigestUDAF[Double].maxDiscrete(10) | |
udaf: org.isarnproject.sketches.udaf.TDigestUDAF[Double] = TDigestUDAF(0.5,10) | |
scala> val agg = data.agg(udaf($"x").alias("tdigest")) | |
agg: org.apache.spark.sql.DataFrame = [tdigest: tdigest] | |
scala> agg.show() | |
+--------------------+ | |
| tdigest| | |
+--------------------+ | |
|TDigestSQL(TDiges...| | |
+--------------------+ | |
scala> agg.write.parquet("/tmp/agg.parquet") | |
2017-07-30 13:32:13 ERROR Utils:91 - Aborting task | |
java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found | |
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]} | |
^ | |
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at scala.util.Try.getOrElse(Try.scala:79) | |
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
2017-07-30 13:32:13 WARN FileOutputCommitter:569 - Could not delete file:/tmp/agg.parquet/_temporary/0/_temporary/attempt_20170730133213_0012_m_000000_0 | |
2017-07-30 13:32:13 ERROR FileFormatWriter:70 - Job job_20170730133213_0012 aborted. | |
2017-07-30 13:32:13 ERROR Executor:91 - Exception in task 0.0 in stage 12.0 (TID 52) | |
org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found | |
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]} | |
^ | |
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at scala.util.Try.getOrElse(Try.scala:79) | |
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
2017-07-30 13:32:13 WARN TaskSetManager:66 - Lost task 0.0 in stage 12.0 (TID 52, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found | |
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]} | |
^ | |
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at scala.util.Try.getOrElse(Try.scala:79) | |
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
2017-07-30 13:32:13 ERROR TaskSetManager:70 - Task 0 in stage 12.0 failed 1 times; aborting job | |
2017-07-30 13:32:13 ERROR FileFormatWriter:91 - Aborting job null. | |
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 12.0 failed 1 times, most recent failure: Lost task 0.0 in stage 12.0 (TID 52, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found | |
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]} | |
^ | |
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at scala.util.Try.getOrElse(Try.scala:79) | |
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:188) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:438) | |
at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:474) | |
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) | |
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:610) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217) | |
at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:509) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:37) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:42) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:44) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:46) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:48) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:50) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:52) | |
at $line29.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:54) | |
at $line29.$read$$iw$$iw$$iw$$iw.<init>(<console>:56) | |
at $line29.$read$$iw$$iw$$iw.<init>(<console>:58) | |
at $line29.$read$$iw$$iw.<init>(<console>:60) | |
at $line29.$read$$iw.<init>(<console>:62) | |
at $line29.$read.<init>(<console>:64) | |
at $line29.$read$.<init>(<console>:68) | |
at $line29.$read$.<clinit>(<console>) | |
at $line29.$eval$.$print$lzycompute(<console>:7) | |
at $line29.$eval$.$print(<console>:6) | |
at $line29.$eval.$print(<console>) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) | |
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) | |
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638) | |
at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637) | |
at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31) | |
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19) | |
at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637) | |
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) | |
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) | |
at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807) | |
at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681) | |
at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395) | |
at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415) | |
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:923) | |
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) | |
at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) | |
at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97) | |
at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909) | |
at org.apache.spark.repl.Main$.doMain(Main.scala:70) | |
at org.apache.spark.repl.Main$.main(Main.scala:53) | |
at org.apache.spark.repl.Main.main(Main.scala) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) | |
at java.lang.reflect.Method.invoke(Method.java:498) | |
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:783) | |
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) | |
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) | |
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120) | |
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) | |
Caused by: org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found | |
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]} | |
^ | |
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at scala.util.Try.getOrElse(Try.scala:79) | |
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
org.apache.spark.SparkException: Job aborted. | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:215) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:173) | |
at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:438) | |
at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:474) | |
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:48) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) | |
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117) | |
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138) | |
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) | |
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135) | |
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92) | |
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92) | |
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:610) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233) | |
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217) | |
at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:509) | |
... 52 elided | |
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 12.0 failed 1 times, most recent failure: Lost task 0.0 in stage 12.0 (TID 52, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found | |
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]} | |
^ | |
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at scala.util.Try.getOrElse(Try.scala:79) | |
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
Driver stacktrace: | |
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) | |
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) | |
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) | |
at scala.Option.foreach(Option.scala:257) | |
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) | |
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) | |
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) | |
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) | |
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:188) | |
... 86 more | |
Caused by: org.apache.spark.SparkException: Task failed while writing rows | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:272) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:191) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$apply$mcV$sp$1.apply(FileFormatWriter.scala:190) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) | |
at org.apache.spark.scheduler.Task.run(Task.scala:108) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) | |
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) | |
at java.lang.Thread.run(Thread.java:748) | |
Caused by: java.lang.IllegalArgumentException: Unsupported dataType: {"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]}, [1.1] failure: `TimestampType' expected but `{' found | |
{"type":"struct","fields":[{"name":"tdigest","type":{"type":"udt","class":"org.apache.spark.isarnproject.sketches.udt.TDigestUDT$","pyClass":"isarnproject.sketches.udt.tdigest.TDigestUDT","sqlType":{"type":"struct","fields":[{"name":"delta","type":"double","nullable":false,"metadata":{}},{"name":"maxDiscrete","type":"integer","nullable":false,"metadata":{}},{"name":"nclusters","type":"integer","nullable":false,"metadata":{}},{"name":"clustX","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}},{"name":"clustM","type":{"type":"array","elementType":"double","containsNull":false},"nullable":false,"metadata":{}}]}},"nullable":true,"metadata":{}}]} | |
^ | |
at org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser$.parse(LegacyTypeStringParser.scala:90) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at org.apache.spark.sql.types.StructType$$anonfun$7.apply(StructType.scala:414) | |
at scala.util.Try.getOrElse(Try.scala:79) | |
at org.apache.spark.sql.types.StructType$.fromString(StructType.scala:414) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport.init(ParquetWriteSupport.scala:80) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:341) | |
at org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:302) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:37) | |
at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$$anon$1.newInstance(ParquetFileFormat.scala:151) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.newOutputWriter(FileFormatWriter.scala:305) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:314) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:258) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:256) | |
at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1375) | |
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:261) | |
... 8 more | |
scala> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment