Skip to content

Instantly share code, notes, and snippets.

@JoshRosen
Created June 15, 2016 03:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JoshRosen/bbcf320d658a9e5380401a4c82a3541a to your computer and use it in GitHub Desktop.
Save JoshRosen/bbcf320d658a9e5380401a4c82a3541a to your computer and use it in GitHub Desktop.
<!DOCTYPE html>
<html>
<head>
<meta name="databricks-html-version" content="1">
<title>Casting structs - Databricks</title>
<meta charset="utf-8">
<meta name="google" content="notranslate">
<meta http-equiv="Content-Language" content="en">
<meta http-equiv="Content-Type" content="text/html; charset=UTF8">
<link rel="stylesheet"
href="https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700">
<link rel="stylesheet" type="text/css" href="https://databricks-staging-cloudfront.staging.cloud.databricks.com/static/201606091012490700-e779236b24019d23ede28f616f85835925f76882/lib/css/bootstrap.min.css">
<link rel="stylesheet" type="text/css" href="https://databricks-staging-cloudfront.staging.cloud.databricks.com/static/201606091012490700-e779236b24019d23ede28f616f85835925f76882/lib/jquery-ui-bundle/jquery-ui.min.css">
<link rel="stylesheet" type="text/css" href="https://databricks-staging-cloudfront.staging.cloud.databricks.com/static/201606091012490700-e779236b24019d23ede28f616f85835925f76882/css/main.css">
<link rel="stylesheet" href="https://databricks-staging-cloudfront.staging.cloud.databricks.com/static/201606091012490700-e779236b24019d23ede28f616f85835925f76882/css/print.css" media="print">
<link rel="icon" type="image/png" href="https://databricks-staging-cloudfront.staging.cloud.databricks.com/static/201606091012490700-e779236b24019d23ede28f616f85835925f76882/img/favicon.ico"/>
<script>window.settings = {"enableAutoCompleteAsYouType":[],"devTierName":"Community Edition","workspaceFeaturedLinks":[{"linkURI":"https://docs.cloud.databricks.com/docs/latest/databricks_guide/index.html","displayName":"Databricks Guide","icon":"question"},{"linkURI":"https://docs.cloud.databricks.com/docs/latest/sample_applications/index.html","displayName":"Application Examples","icon":"code"},{"linkURI":"https://docs.cloud.databricks.com/docs/latest/courses/index.html","displayName":"Training","icon":"graduation-cap"}],"dbcForumURL":"http://forums.databricks.com/","nodeInfo":{"node_types":[{"spark_heap_memory":23800,"instance_type_id":"r3.2xlarge","node_type_id":"memory-optimized","description":"Memory Optimized","container_memory_mb":28000,"memory_mb":30720,"num_cores":4.0},{"spark_heap_memory":9702,"instance_type_id":"c3.4xlarge","node_type_id":"compute-optimized","description":"Compute Optimized","container_memory_mb":12128,"memory_mb":15360,"num_cores":8.0},{"spark_heap_memory":19814,"instance_type_id":"r3.xlarge","node_type_id":"r3.xlarge","description":"r3.xlarge","container_memory_mb":24768,"memory_mb":31232,"num_cores":4.0},{"spark_heap_memory":44800,"instance_type_id":"r3.2xlarge","node_type_id":"r3.2xlarge","description":"r3.2xlarge","container_memory_mb":56000,"memory_mb":62464,"num_cores":8.0},{"spark_heap_memory":94771,"instance_type_id":"r3.4xlarge","node_type_id":"r3.4xlarge","description":"r3.4xlarge","container_memory_mb":118464,"memory_mb":124928,"num_cores":16.0},{"spark_heap_memory":194713,"instance_type_id":"r3.8xlarge","node_type_id":"r3.8xlarge","description":"r3.8xlarge","container_memory_mb":243392,"memory_mb":249856,"num_cores":32.0},{"spark_heap_memory":7116,"instance_type_id":"c3.2xlarge","node_type_id":"c3.2xlarge","description":"c3.2xlarge","container_memory_mb":8896,"memory_mb":15360,"num_cores":8.0},{"spark_heap_memory":19404,"instance_type_id":"c3.4xlarge","node_type_id":"c3.4xlarge","description":"c3.4xlarge","container_memory_mb":24256,"memory_mb":30720,"num_cores":16.0},{"spark_heap_memory":43980,"instance_type_id":"c3.8xlarge","node_type_id":"c3.8xlarge","description":"c3.8xlarge","container_memory_mb":54976,"memory_mb":61440,"num_cores":32.0},{"spark_heap_memory":7116,"instance_type_id":"c4.2xlarge","node_type_id":"c4.2xlarge","description":"c4.2xlarge","container_memory_mb":8896,"memory_mb":15360,"num_cores":8.0},{"spark_heap_memory":19404,"instance_type_id":"c4.4xlarge","node_type_id":"c4.4xlarge","description":"c4.4xlarge","container_memory_mb":24256,"memory_mb":30720,"num_cores":16.0},{"spark_heap_memory":43980,"instance_type_id":"c4.8xlarge","node_type_id":"c4.8xlarge","description":"c4.8xlarge","container_memory_mb":54976,"memory_mb":61440,"num_cores":36.0},{"spark_heap_memory":194713,"instance_type_id":"i2.8xlarge","node_type_id":"i2.8xlarge","description":"i2.8xlarge","container_memory_mb":243392,"memory_mb":249856,"num_cores":32.0}],"default_node_type_id":"memory-optimized"},"enableThirdPartyApplicationsUI":true,"enableClusterAcls":true,"notebookRevisionVisibilityHorizon":0,"enableTableHandler":true,"isAdmin":true,"enableLargeResultDownload":true,"zoneInfos":[{"id":"us-west-2a","isDefault":true},{"id":"us-west-2b","isDefault":false}],"enablePublishNotebooks":false,"enableJobAclsConfig":true,"enableFullTextSearch":true,"enableElasticSparkUI":true,"clusters":true,"allowRunOnPendingClusters":true,"applications":true,"fileStoreBase":"FileStore","configurableSparkOptionsSpec":[{"keyPattern":"spark\\.kryo(\\.[^\\.]+)+","valuePattern":".*","keyPatternDisplay":"spark.kryo.*","valuePatternDisplay":"*","description":"Configuration options for Kryo serialization"},{"keyPattern":"spark\\.io\\.compression\\.codec","valuePattern":"(lzf|snappy|org\\.apache\\.spark\\.io\\.LZFCompressionCodec|org\\.apache\\.spark\\.io\\.SnappyCompressionCodec)","keyPatternDisplay":"spark.io.compression.codec","valuePatternDisplay":"snappy|lzf","description":"The codec used to compress internal data such as RDD partitions, broadcast variables and shuffle outputs."},{"keyPattern":"spark\\.serializer","valuePattern":"(org\\.apache\\.spark\\.serializer\\.JavaSerializer|org\\.apache\\.spark\\.serializer\\.KryoSerializer)","keyPatternDisplay":"spark.serializer","valuePatternDisplay":"org.apache.spark.serializer.JavaSerializer|org.apache.spark.serializer.KryoSerializer","description":"Class to use for serializing objects that will be sent over the network or need to be cached in serialized form."},{"keyPattern":"spark\\.rdd\\.compress","valuePattern":"(true|false)","keyPatternDisplay":"spark.rdd.compress","valuePatternDisplay":"true|false","description":"Whether to compress serialized RDD partitions (e.g. for StorageLevel.MEMORY_ONLY_SER). Can save substantial space at the cost of some extra CPU time."},{"keyPattern":"spark\\.speculation","valuePattern":"(true|false)","keyPatternDisplay":"spark.speculation","valuePatternDisplay":"true|false","description":"Whether to use speculation (recommended off for streaming)"},{"keyPattern":"spark\\.es(\\.[^\\.]+)+","valuePattern":".*","keyPatternDisplay":"spark.es.*","valuePatternDisplay":"*","description":"Configuration options for ElasticSearch"},{"keyPattern":"es(\\.([^\\.]+))+","valuePattern":".*","keyPatternDisplay":"es.*","valuePatternDisplay":"*","description":"Configuration options for ElasticSearch"},{"keyPattern":"spark\\.(storage|shuffle)\\.memoryFraction","valuePattern":"0?\\.0*([1-9])([0-9])*","keyPatternDisplay":"spark.(storage|shuffle).memoryFraction","valuePatternDisplay":"(0.0,1.0)","description":"Fraction of Java heap to use for Spark's shuffle or storage"},{"keyPattern":"spark\\.streaming\\.backpressure\\.enabled","valuePattern":"(true|false)","keyPatternDisplay":"spark.streaming.backpressure.enabled","valuePatternDisplay":"true|false","description":"Enables or disables Spark Streaming's internal backpressure mechanism (since 1.5). This enables the Spark Streaming to control the receiving rate based on the current batch scheduling delays and processing times so that the system receives only as fast as the system can process. Internally, this dynamically sets the maximum receiving rate of receivers. This rate is upper bounded by the values `spark.streaming.receiver.maxRate` and `spark.streaming.kafka.maxRatePerPartition` if they are set."},{"keyPattern":"spark\\.streaming\\.receiver\\.maxRate","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.receiver.maxRate","valuePatternDisplay":"numeric","description":"Maximum rate (number of records per second) at which each receiver will receive data. Effectively, each stream will consume at most this number of records per second. Setting this configuration to 0 or a negative number will put no limit on the rate. See the deployment guide in the Spark Streaming programing guide for mode details."},{"keyPattern":"spark\\.streaming\\.kafka\\.maxRatePerPartition","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.kafka.maxRatePerPartition","valuePatternDisplay":"numeric","description":"Maximum rate (number of records per second) at which data will be read from each Kafka partition when using the Kafka direct stream API introduced in Spark 1.3. See the Kafka Integration guide for more details."},{"keyPattern":"spark\\.streaming\\.kafka\\.maxRetries","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.kafka.maxRetries","valuePatternDisplay":"numeric","description":"Maximum number of consecutive retries the driver will make in order to find the latest offsets on the leader of each partition (a default value of 1 means that the driver will make a maximum of 2 attempts). Only applies to the Kafka direct stream API introduced in Spark 1.3."},{"keyPattern":"spark\\.streaming\\.ui\\.retainedBatches","valuePattern":"^([0-9]{1,})$","keyPatternDisplay":"spark.streaming.ui.retainedBatches","valuePatternDisplay":"numeric","description":"How many batches the Spark Streaming UI and status APIs remember before garbage collecting."},{"keyPattern":"\\.*","valuePattern":"\\.*","keyPatternDisplay":"","valuePatternDisplay":"","description":""},{"keyPattern":"spark\\.driver\\.memory","valuePattern":"([1-9][0-9]*)(m|mb|g|gb|M|MB|G|GB|Mb|Gb)","keyPatternDisplay":"spark.driver.memory","valuePatternDisplay":"Positive memory quantity in MB or GB, e.g. 500mb or 10gb","description":"Size of the heap memory allocated to the driver."},{"keyPattern":"spark\\.executor\\.memory","valuePattern":"([1-9][0-9]*)(m|mb|g|gb|M|MB|G|GB|Mb|Gb)","keyPatternDisplay":"spark.executor.memory","valuePatternDisplay":"Positive memory quantity in MB or GB, e.g. 500mb or 10gb","description":"Size of the heap memory allocated to the executors."}],"enableReactNotebookComments":true,"enableResetPassword":true,"enableJobsSparkUpgrade":true,"sparkVersions":[{"key":"1.6.x-ubuntu15.10","displayName":"Spark 1.6.x (Hadoop 1)","packageLabel":"spark-1.6.1-hadoop1-jenkins-ip-10-30-10-144-Ue779236b24-S2368283920-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"1.4.x-ubuntu15.10","displayName":"Spark 1.4.1 (Hadoop 1)","packageLabel":"spark-1.4-jenkins-ip-10-30-10-144-Ue779236b24-S9c254ab12a-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"master","displayName":"Spark master (dev)","packageLabel":"","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"1.6.x-ubuntu15.10-hadoop1","displayName":"Spark 1.6.x (Hadoop 1)","packageLabel":"spark-1.6.1-hadoop1-jenkins-ip-10-30-10-144-Ue779236b24-S2368283920-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":false},{"key":"1.6.1-ubuntu15.10-hadoop1","displayName":"Spark 1.6.1 (Hadoop 1)","packageLabel":"spark-1.6.1-hadoop1-jenkins-ip-10-30-10-144-Ue779236b24-S2368283920-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.1-ubuntu15.10-hadoop2","displayName":"Spark 1.6.1 (Hadoop 2)","packageLabel":"spark-1.6.1-hadoop2-jenkins-ip-10-30-10-144-Ue779236b24-S2368283920-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.5.x-ubuntu15.10","displayName":"Spark 1.5.2 (Hadoop 1)","packageLabel":"spark-1.5-jenkins-ip-10-30-10-144-Ue779236b24-S9ca52d000d-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.3.x-ubuntu15.10","displayName":"Spark 1.3.0 (Hadoop 1)","packageLabel":"spark-1.3-jenkins-ip-10-30-10-144-Ue779236b24-Sa2ee4664b2-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"2.0.x-ubuntu15.10","displayName":"Spark 2.0 (apache/branch-2.0 preview)","packageLabel":"spark-image-1e6991b9c933c2b5cf5faf9168e75d372c63dc1901a77cd495f1879852eef79a","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.0-ubuntu15.10","displayName":"Spark 1.6.0 (Hadoop 1)","packageLabel":"spark-1.6.0-jenkins-ip-10-30-10-144-Ue779236b24-Sf90f83597b-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":true},{"key":"1.6.x-ubuntu15.10-hadoop2","displayName":"Spark 1.6.x (Hadoop 2)","packageLabel":"spark-1.6.1-hadoop2-jenkins-ip-10-30-10-144-Ue779236b24-S2368283920-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":false}],"enableRestrictedClusterCreation":false,"enableFeedback":true,"enableClusterAutoScaling":false,"defaultNumWorkers":8,"serverContinuationTimeoutMillis":10000,"driverStderrFilePrefix":"stderr","enableNotebookRefresh":true,"driverStdoutFilePrefix":"stdout","enableSparkDocsSearch":true,"sparkHistoryServerEnabled":true,"sanitizeMarkdownHtml":true,"enableIPythonImportExport":true,"enableNotebookHistoryDiffing":true,"branch":"2.21.1","accountsLimit":-1,"enableNotebookGitBranching":true,"local":false,"enableStrongPassword":false,"displayDefaultContainerMemoryGB":30,"deploymentMode":"staging","useSpotForWorkers":true,"enableUserInviteWorkflow":false,"enableStaticNotebooks":true,"enableCssTransitions":true,"showHomepageFeaturedLinks":true,"pricingURL":"https://databricks.com/product/pricing","enableClusterAclsConfig":true,"notifyLastLogin":true,"enableNotebookGitVersioning":true,"files":"files/","enableDriverLogsUI":true,"disableLegacyDashboards":true,"enableWorkspaceAclsConfig":true,"dropzoneMaxFileSize":4096,"enableNewDashboardViews":true,"driverLog4jFilePrefix":"log4j","enableSingleSignOn":true,"enableMavenLibraries":true,"displayRowLimit":1000,"defaultSparkVersion":{"key":"1.6.1-ubuntu15.10-hadoop1","displayName":"Spark 1.6.1 (Hadoop 1)","packageLabel":"spark-1.6.1-hadoop1-jenkins-ip-10-30-10-144-Ue779236b24-S2368283920-2016-06-09-18:33:43.873585","upgradable":true,"deprecated":false,"customerVisible":true},"enableMountAclsConfig":false,"enableClusterAclsByTier":true,"disallowAddingAdmins":false,"enableSparkConfUI":true,"featureTier":"UNKNOWN_TIER","enableOrgSwitcherUI":false,"clustersLimit":-1,"enableJdbcImport":true,"logfiles":"logfiles/","enableWebappSharding":false,"enableClusterDeltaUpdates":true,"enableSingleSignOnLogin":true,"useFixedStaticNotebookVersionForDevelopment":false,"enableMountAcls":false,"requireEmailUserName":true,"enableDashboardViews":false,"dbcFeedbackURL":"http://feedback.databricks.com/forums/263785-product-feedback","enableMountAclService":true,"enableWorkspaceAclService":true,"enableWorkspaceAcls":true,"gitHash":"e779236b24019d23ede28f616f85835925f76882","showWorkspaceFeaturedLinks":true,"signupUrl":"https://databricks.com/try-databricks","allowFeedbackForumAccess":true,"enableImportFromUrl":true,"enableMiniClusters":false,"showDevTierBetaVersion":false,"enableDebugUI":true,"allowNonAdminUsers":true,"enableSingleSignOnByTier":true,"staticNotebookResourceUrl":"https://databricks-staging-cloudfront.staging.cloud.databricks.com/static/201606091012490700-e779236b24019d23ede28f616f85835925f76882/","enableSparkPackages":true,"dynamicSparkVersions":true,"enableNotebookHistoryUI":true,"showDebugCounters":true,"enableFolderHtmlExport":true,"enableSparkVersionsUI":true,"homepageFeaturedLinks":[{"linkURI":"https://docs.cloud.databricks.com/docs/latest/featured_notebooks/A%20Gentle%20Introduction%20to%20Apache%20Spark%20on%20Databricks.html","displayName":"Introduction to Apache Spark on Databricks","icon":"img/home/Python_icon.svg"},{"linkURI":"https://docs.cloud.databricks.com/docs/latest/featured_notebooks/Quick%20Start%20DataFrames.html","displayName":"Quick Start DataFrames","icon":"img/home/Scala_icon.svg"},{"linkURI":"https://docs.cloud.databricks.com/docs/latest/featured_notebooks/GSW%20Passing%20Analysis%20(new).html","displayName":"GSW Passing Analysis (new)","icon":"img/home/Python_icon.svg"}],"upgradeURL":"","notebookLoadingBackground":"#fff","enableServerAutoComplete":true,"enableStaticHtmlImport":true,"enableTerminal":true,"defaultMemoryPerContainerMB":28000,"enablePresenceUI":true,"accounts":true,"useFramedStaticNotebooks":true,"enableNewProgressReportUI":true,"defaultCoresPerContainer":4};</script>
<script>var __DATABRICKS_NOTEBOOK_MODEL = {"version":"NotebookV1","origId":3051441,"name":"Casting structs","language":"scala","commands":[{"version":"CommandV1","origId":3051443,"guid":"513b0935-cc67-4947-9e3e-9a0bf00bc0a1","subtype":"command","commandType":"auto","position":1.0,"command":"// Create temp test.json file on DBFS\n// Example JSON with one JSON object per line\ndbutils.fs.put(\n\"/tmp/test.json\",\n\"\"\"{\"string\":\"string1\",\"int\":1,\"array\":[1,2,3],\"dict\": {\"key\": \"value1\"}}\n{\"string\":\"string2\",\"int\":2,\"array\":[2,4,6],\"dict\": {\"key\": \"value2\"}}\n{\"string\":\"string3\",\"int\":3,\"array\":[3,6,9],\"dict\": {\"key\": \"value3\", \"extra_key\": \"extra_value3\"}}\n\"\"\",\ntrue)","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">Wrote 242 bytes.\nres0: Boolean = true\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":null,"error":null,"workflows":[],"startTime":1.465959629344E12,"submitTime":1.465959628764E12,"finishTime":1.465959635762E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"afe1cd4a-0064-4292-acf6-253361907430"},{"version":"CommandV1","origId":3051444,"guid":"89f1ad52-3020-4c09-81fb-f1326833bcc0","subtype":"command","commandType":"auto","position":2.0,"command":"val testJsonData = sqlContext.read.json(\"/tmp/test.json\")","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">testJsonData: org.apache.spark.sql.DataFrame = [array: array&lt;bigint&gt;, dict: struct&lt;extra_key:string,key:string&gt;, int: bigint, string: string]\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":"<div class=\"ansiout\">&lt;console&gt;:24: error: value read is not a member of org.apache.spark.sql.hive.HiveContext\n val testJsonData = sqlContext.read.json(&quot;/tmp/test.json&quot;)\n ^\n</div>","error":null,"workflows":[],"startTime":1.465959635889E12,"submitTime":1.465959629129E12,"finishTime":1.465959638979E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"","showCommandTitle":false,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"6a015e47-e023-4a24-adce-9ac6997560a9"},{"version":"CommandV1","origId":3051449,"guid":"f2c2c5a7-652f-4a1c-a493-f48addc23b4e","subtype":"command","commandType":"auto","position":2.03125,"command":"testJsonData.selectExpr(\"cast(dict as string)\").collect()","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">res1: Array[org.apache.spark.sql.Row] = Array([[null,value1]], [[null,value2]], [[extra_value3,value3]])\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":"org.apache.spark.sql.AnalysisException: cannot resolve 'cast(dict as double)' due to data type mismatch: cannot cast StructType(StructField(extra_key,StringType,true), StructField(key,StringType,true)) to DoubleType;","error":"<div class=\"ansiout\">\tat org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:65)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:57)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:335)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:335)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:334)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:332)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:332)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:281)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:727)\n\tat scala.collection.AbstractIterator.foreach(Iterator.scala:1157)\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)\n\tat scala.collection.AbstractIterator.to(Iterator.scala:1157)\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)\n\tat scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)\n\tat scala.collection.AbstractIterator.toArray(Iterator.scala:1157)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:321)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:332)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionUp$1(QueryPlan.scala:108)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2(QueryPlan.scala:118)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2$1.apply(QueryPlan.scala:122)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)\n\tat scala.collection.TraversableLike$class.map(TraversableLike.scala:244)\n\tat scala.collection.AbstractTraversable.map(Traversable.scala:105)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2(QueryPlan.scala:122)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:127)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:727)\n\tat scala.collection.AbstractIterator.foreach(Iterator.scala:1157)\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)\n\tat scala.collection.AbstractIterator.to(Iterator.scala:1157)\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)\n\tat scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)\n\tat scala.collection.AbstractIterator.toArray(Iterator.scala:1157)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:127)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:57)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34)\n\tat org.apache.spark.sql.DataFrame.&lt;init&gt;(DataFrame.scala:133)\n\tat org.apache.spark.sql.DataFrame.org$apache$spark$sql$DataFrame$$withPlan(DataFrame.scala:2126)\n\tat org.apache.spark.sql.DataFrame.select(DataFrame.scala:707)\n\tat org.apache.spark.sql.DataFrame.selectExpr(DataFrame.scala:740)</div>","workflows":[],"startTime":1.465959516268E12,"submitTime":1.465959515806E12,"finishTime":1.465959517598E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"Spark 1.4.1","showCommandTitle":true,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"927ac560-374f-467d-b34f-70ff3ad7b673"},{"version":"CommandV1","origId":3051451,"guid":"ca6ba1a5-02eb-482a-b831-525987fec597","subtype":"command","commandType":"auto","position":2.2734375,"command":"testJsonData.selectExpr(\"cast(dict as string)\").collect()","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">res1: Array[org.apache.spark.sql.Row] = Array([[null,value1]], [[null,value2]], [[extra_value3,value3]])\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":"<div class=\"ansiout\">&lt;console&gt;:26: error: not found: value testJsonData\n testJsonData.selectExpr(&quot;cast(dict as string)&quot;).collect()\n ^\n</div>","error":null,"workflows":[],"startTime":1.46595963901E12,"submitTime":1.465959631452E12,"finishTime":1.465959640485E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"Spark 1.5.2","showCommandTitle":true,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"06c1ddcd-ce88-401e-9b95-26581d3fa508"},{"version":"CommandV1","origId":3051445,"guid":"3394dcdf-326d-4f45-a7a1-3a30974dd323","subtype":"command","commandType":"auto","position":3.0,"command":"testJsonData.selectExpr(\"cast(dict as string)\").collect()","commandVersion":0,"state":"finished","results":{"type":"html","data":"<div class=\"ansiout\">res8: Array[org.apache.spark.sql.Row] = Array([[1,0,1800000006,3165756c6176]], [[1,0,1800000006,3265756c6176]], [[0,180000000c,2800000006,61765f6172747865,3365756c,3365756c6176]])\n</div>","arguments":{},"addedWidgets":{},"removedWidgets":[]},"errorSummary":"org.apache.spark.sql.AnalysisException: cannot resolve 'cast(dict as double)' due to data type mismatch: cannot cast StructType(StructField(extra_key,StringType,true), StructField(key,StringType,true)) to DoubleType;","error":"<div class=\"ansiout\">\tat org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:65)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:57)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:335)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:335)\n\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:334)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:332)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:332)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:281)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:727)\n\tat scala.collection.AbstractIterator.foreach(Iterator.scala:1157)\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)\n\tat scala.collection.AbstractIterator.to(Iterator.scala:1157)\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)\n\tat scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)\n\tat scala.collection.AbstractIterator.toArray(Iterator.scala:1157)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:321)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:332)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionUp$1(QueryPlan.scala:108)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2(QueryPlan.scala:118)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2$1.apply(QueryPlan.scala:122)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)\n\tat scala.collection.TraversableLike$class.map(TraversableLike.scala:244)\n\tat scala.collection.AbstractTraversable.map(Traversable.scala:105)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$2(QueryPlan.scala:122)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$2.apply(QueryPlan.scala:127)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat scala.collection.Iterator$class.foreach(Iterator.scala:727)\n\tat scala.collection.AbstractIterator.foreach(Iterator.scala:1157)\n\tat scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)\n\tat scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)\n\tat scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)\n\tat scala.collection.AbstractIterator.to(Iterator.scala:1157)\n\tat scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)\n\tat scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)\n\tat scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)\n\tat scala.collection.AbstractIterator.toArray(Iterator.scala:1157)\n\tat org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:127)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:57)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)\n\tat org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121)\n\tat org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)\n\tat org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44)\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34)\n\tat org.apache.spark.sql.DataFrame.&lt;init&gt;(DataFrame.scala:133)\n\tat org.apache.spark.sql.DataFrame.org$apache$spark$sql$DataFrame$$withPlan(DataFrame.scala:2126)\n\tat org.apache.spark.sql.DataFrame.select(DataFrame.scala:707)\n\tat org.apache.spark.sql.DataFrame.selectExpr(DataFrame.scala:740)</div>","workflows":[],"startTime":1.465959317191E12,"submitTime":1.465959315987E12,"finishTime":1.465959317494E12,"collapsed":false,"bindings":{},"inputWidgets":{},"displayType":"table","width":"auto","height":"auto","xColumns":null,"yColumns":null,"pivotColumns":null,"pivotAggregation":null,"customPlotOptions":{},"commentThread":[],"commentsVisible":false,"parentHierarchy":[],"diffInserts":[],"diffDeletes":[],"globalVars":{},"latestUser":"a user","commandTitle":"Spark 1.6.1","showCommandTitle":true,"hideCommandCode":false,"hideCommandResult":false,"iPythonMetadata":null,"nuid":"60c69543-e650-4a97-af48-816f2f517f9a"}],"dashboards":[],"guid":"ac507a4f-c85a-4127-b70a-e77efeca587b","globalVars":{},"iPythonMetadata":null,"inputWidgets":{}};</script>
<script
src="https://databricks-staging-cloudfront.staging.cloud.databricks.com/static/201606091012490700-e779236b24019d23ede28f616f85835925f76882/js/notebook-main.js"
onerror="window.mainJsLoadError = true;"></script>
</head>
<body>
<script>
if (window.mainJsLoadError) {
var u = 'https://databricks-staging-cloudfront.staging.cloud.databricks.com/static/201606091012490700-e779236b24019d23ede28f616f85835925f76882/js/notebook-main.js';
var b = document.getElementsByTagName('body')[0];
var c = document.createElement('div');
c.innerHTML = ('<h1>Network Error</h1>' +
'<p><b>Please check your network connection and try again.</b></p>' +
'<p>Could not load a required resource: ' + u + '</p>');
c.style.margin = '30px';
c.style.padding = '20px 50px';
c.style.backgroundColor = '#f5f5f5';
c.style.borderRadius = '5px';
b.appendChild(c);
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment