Created
May 10, 2023 09:42
-
-
Save drorata/2d2e4f145996337e042d71f3d101f14a to your computer and use it in GitHub Desktop.
Spark comparing columns
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "9ecca3b8-3c7d-44fd-ad67-cc4e75b816d5", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"from pyspark.sql import functions as F\n", | |
"from pyspark.sql.types import IntegerType, DoubleType, StringType, StructType" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "1aecdf9d-08e9-4093-9980-5469250678b7", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"<style scoped>\n", | |
" .table-result-container {\n", | |
" max-height: 300px;\n", | |
" overflow: auto;\n", | |
" }\n", | |
" table, th, td {\n", | |
" border: 1px solid black;\n", | |
" border-collapse: collapse;\n", | |
" }\n", | |
" th, td {\n", | |
" padding: 5px;\n", | |
" }\n", | |
" th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>ints</th><th>doubles</th><th>strings</th></tr></thead><tbody><tr><td>1</td><td>1.0</td><td>1</td></tr><tr><td>1</td><td>1.2</td><td>1.2</td></tr><tr><td>1</td><td>1.2</td><td>1.3</td></tr><tr><td>1</td><td>1.2</td><td>some string</td></tr></tbody></table></div>" | |
] | |
}, | |
"metadata": { | |
"application/vnd.databricks.v1+output": { | |
"addedWidgets": {}, | |
"aggData": [], | |
"aggError": "", | |
"aggOverflow": false, | |
"aggSchema": [], | |
"aggSeriesLimitReached": false, | |
"aggType": "", | |
"arguments": {}, | |
"columnCustomDisplayInfos": {}, | |
"data": [ | |
[ | |
1, | |
1.0, | |
"1" | |
], | |
[ | |
1, | |
1.2, | |
"1.2" | |
], | |
[ | |
1, | |
1.2, | |
"1.3" | |
], | |
[ | |
1, | |
1.2, | |
"some string" | |
] | |
], | |
"datasetInfos": [], | |
"dbfsResultPath": null, | |
"isJsonSchema": true, | |
"metadata": {}, | |
"overflow": false, | |
"plotOptions": { | |
"customPlotOptions": {}, | |
"displayType": "table", | |
"pivotAggregation": null, | |
"pivotColumns": null, | |
"xColumns": null, | |
"yColumns": null | |
}, | |
"removedWidgets": [], | |
"schema": [ | |
{ | |
"metadata": "{}", | |
"name": "ints", | |
"type": "\"integer\"" | |
}, | |
{ | |
"metadata": "{}", | |
"name": "doubles", | |
"type": "\"double\"" | |
}, | |
{ | |
"metadata": "{}", | |
"name": "strings", | |
"type": "\"string\"" | |
} | |
], | |
"type": "table" | |
} | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"cols = StructType([\n", | |
" StructField('ints', IntegerType()),\n", | |
" StructField('doubles', DoubleType()),\n", | |
" StructField('strings', StringType()),\n", | |
"])\n", | |
"data = [\n", | |
" [1, 1.0, \"1\"],\n", | |
" [1, 1.2, \"1.2\"],\n", | |
" [1, 1.2, \"1.3\"],\n", | |
" [1, 1.2, \"some string\"]\n", | |
"]\n", | |
"df = spark.createDataFrame(data, cols)\n", | |
"display(df)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": {}, | |
"inputWidgets": {}, | |
"nuid": "5c1d0d54-18f5-4e4a-9ddf-383ce5308ef1", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"source": [ | |
"# INTs vs DOUBLEs ✅" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "1fb7e2b9-1150-423a-8b50-d86ceb6e59ac", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"+----+-------+-----------+-------------+\n|ints|doubles| strings|int = doubles|\n+----+-------+-----------+-------------+\n| 1| 1.0| 1| true|\n| 1| 1.2| 1.2| false|\n| 1| 1.2| 1.3| false|\n| 1| 1.2|some string| false|\n+----+-------+-----------+-------------+\n\n" | |
] | |
} | |
], | |
"source": [ | |
"df.withColumn(\"int = doubles\", F.col(\"ints\") == F.col(\"doubles\")).show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "7e7fff70-bb82-46e6-9b7c-debf1f227243", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('ints = 'doubles) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, int = doubles: boolean\nProject [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n*(1) Project [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- *(1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n" | |
] | |
} | |
], | |
"source": [ | |
"df.withColumn(\"int = doubles\", F.col(\"ints\") == F.col(\"doubles\")).explain(True)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": {}, | |
"inputWidgets": {}, | |
"nuid": "8e14b7c4-4194-4a1d-8c58-e6970f443169", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"source": [ | |
"# INTs vs STRINGs ❌" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "eec13726-8878-4857-9f17-a4ae02faf7d8", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"+----+-------+-----------+------------+\n|ints|doubles| strings|int = string|\n+----+-------+-----------+------------+\n| 1| 1.0| 1| true|\n| 1| 1.2| 1.2| true|\n| 1| 1.2| 1.3| true|\n| 1| 1.2|some string| null|\n+----+-------+-----------+------------+\n\n" | |
] | |
} | |
], | |
"source": [ | |
"df.withColumn(\"int = string\", F.col(\"ints\") == F.col(\"strings\")).show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "5a88bbc2-e457-4512-bb90-aeb3950ecc73", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('ints = 'strings) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, int = string: boolean\nProject [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n*(1) Project [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- *(1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n" | |
] | |
} | |
], | |
"source": [ | |
"df.withColumn(\"int = string\", F.col(\"ints\") == F.col(\"strings\")).explain(True)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": {}, | |
"inputWidgets": {}, | |
"nuid": "d62e98b8-3675-4cb8-9049-3bbfbd5a05a7", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"source": [ | |
"# DOUBLEs vs STRINGs 🔱" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "af02f4f6-7e6a-489b-b6ed-e22175089449", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"+----+-------+-----------+---------------+\n|ints|doubles| strings|double = string|\n+----+-------+-----------+---------------+\n| 1| 1.0| 1| true|\n| 1| 1.2| 1.2| true|\n| 1| 1.2| 1.3| false|\n| 1| 1.2|some string| null|\n+----+-------+-----------+---------------+\n\n" | |
] | |
} | |
], | |
"source": [ | |
"df.withColumn(\"double = string\", F.col(\"doubles\") == F.col(\"strings\")).show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "19201770-fc3b-49c0-ae35-01a3847c41f0", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('doubles = 'strings) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, double = string: boolean\nProject [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n*(1) Project [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- *(1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n" | |
] | |
} | |
], | |
"source": [ | |
"df.withColumn(\"double = string\", F.col(\"doubles\") == F.col(\"strings\")).explain(True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "7b193033-54c6-4b74-a7a3-3a590feef323", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"<style scoped>\n", | |
" .table-result-container {\n", | |
" max-height: 300px;\n", | |
" overflow: auto;\n", | |
" }\n", | |
" table, th, td {\n", | |
" border: 1px solid black;\n", | |
" border-collapse: collapse;\n", | |
" }\n", | |
" th, td {\n", | |
" padding: 5px;\n", | |
" }\n", | |
" th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>ints</th><th>values</th></tr></thead><tbody><tr><td>1</td><td>foo</td></tr><tr><td>1</td><td>bar</td></tr><tr><td>1</td><td>goo</td></tr><tr><td>1</td><td>loo</td></tr></tbody></table></div>" | |
] | |
}, | |
"metadata": { | |
"application/vnd.databricks.v1+output": { | |
"addedWidgets": {}, | |
"aggData": [], | |
"aggError": "", | |
"aggOverflow": false, | |
"aggSchema": [], | |
"aggSeriesLimitReached": false, | |
"aggType": "", | |
"arguments": {}, | |
"columnCustomDisplayInfos": {}, | |
"data": [ | |
[ | |
1, | |
"foo" | |
], | |
[ | |
1, | |
"bar" | |
], | |
[ | |
1, | |
"goo" | |
], | |
[ | |
1, | |
"loo" | |
] | |
], | |
"datasetInfos": [], | |
"dbfsResultPath": null, | |
"isJsonSchema": true, | |
"metadata": {}, | |
"overflow": false, | |
"plotOptions": { | |
"customPlotOptions": {}, | |
"displayType": "table", | |
"pivotAggregation": null, | |
"pivotColumns": null, | |
"xColumns": null, | |
"yColumns": null | |
}, | |
"removedWidgets": [], | |
"schema": [ | |
{ | |
"metadata": "{}", | |
"name": "ints", | |
"type": "\"integer\"" | |
}, | |
{ | |
"metadata": "{}", | |
"name": "values", | |
"type": "\"string\"" | |
} | |
], | |
"type": "table" | |
} | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"cols = StructType([\n", | |
" StructField('ints', IntegerType()),\n", | |
" StructField('values', StringType()),\n", | |
"])\n", | |
"data = [\n", | |
" [1, \"foo\"],\n", | |
" [1, \"bar\"],\n", | |
" [1, \"goo\"],\n", | |
" [1, \"loo\"]\n", | |
"]\n", | |
"df1 = spark.createDataFrame(data, cols)\n", | |
"display(df1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "31c3a02d-9823-4b73-852c-55aa52620b9b", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"<style scoped>\n", | |
" .table-result-container {\n", | |
" max-height: 300px;\n", | |
" overflow: auto;\n", | |
" }\n", | |
" table, th, td {\n", | |
" border: 1px solid black;\n", | |
" border-collapse: collapse;\n", | |
" }\n", | |
" th, td {\n", | |
" padding: 5px;\n", | |
" }\n", | |
" th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>strings</th><th>values2</th></tr></thead><tbody><tr><td>1.0</td><td>foo2</td></tr><tr><td>1.2</td><td>bar2</td></tr><tr><td>1.3</td><td>goo2</td></tr><tr><td>some string</td><td>loo2</td></tr></tbody></table></div>" | |
] | |
}, | |
"metadata": { | |
"application/vnd.databricks.v1+output": { | |
"addedWidgets": {}, | |
"aggData": [], | |
"aggError": "", | |
"aggOverflow": false, | |
"aggSchema": [], | |
"aggSeriesLimitReached": false, | |
"aggType": "", | |
"arguments": {}, | |
"columnCustomDisplayInfos": {}, | |
"data": [ | |
[ | |
"1.0", | |
"foo2" | |
], | |
[ | |
"1.2", | |
"bar2" | |
], | |
[ | |
"1.3", | |
"goo2" | |
], | |
[ | |
"some string", | |
"loo2" | |
] | |
], | |
"datasetInfos": [], | |
"dbfsResultPath": null, | |
"isJsonSchema": true, | |
"metadata": {}, | |
"overflow": false, | |
"plotOptions": { | |
"customPlotOptions": {}, | |
"displayType": "table", | |
"pivotAggregation": null, | |
"pivotColumns": null, | |
"xColumns": null, | |
"yColumns": null | |
}, | |
"removedWidgets": [], | |
"schema": [ | |
{ | |
"metadata": "{}", | |
"name": "strings", | |
"type": "\"string\"" | |
}, | |
{ | |
"metadata": "{}", | |
"name": "values2", | |
"type": "\"string\"" | |
} | |
], | |
"type": "table" | |
} | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"cols = StructType([\n", | |
" StructField('strings', StringType()),\n", | |
" StructField('values2', StringType()),\n", | |
"])\n", | |
"data = [\n", | |
" [\"1.0\", \"foo2\"],\n", | |
" [\"1.2\", \"bar2\"],\n", | |
" [\"1.3\", \"goo2\"],\n", | |
" [\"some string\", \"loo2\"]\n", | |
"]\n", | |
"df2 = spark.createDataFrame(data, cols)\n", | |
"display(df2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "a1c5d327-eaca-4be9-918d-444d2142663b", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"+----+------+-------+-------+\n|ints|values|strings|values2|\n+----+------+-------+-------+\n| 1| foo| 1.3| goo2|\n| 1| foo| 1.2| bar2|\n| 1| foo| 1.0| foo2|\n| 1| bar| 1.3| goo2|\n| 1| bar| 1.2| bar2|\n| 1| bar| 1.0| foo2|\n| 1| goo| 1.3| goo2|\n| 1| goo| 1.2| bar2|\n| 1| goo| 1.0| foo2|\n| 1| loo| 1.3| goo2|\n| 1| loo| 1.2| bar2|\n| 1| loo| 1.0| foo2|\n+----+------+-------+-------+\n\n" | |
] | |
} | |
], | |
"source": [ | |
"df1.join(df2, df1.ints == df2.strings, how=\"left\").show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "2fb24f20-6cfd-4ba0-9590-6c3130eb310a", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.databricks.v1+bamboolib_hint": "{\"pd.DataFrames\": [], \"version\": \"0.0.1\"}", | |
"text/plain": [] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "1b77d904-163e-44e4-abb7-406101b7cef2", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ints</th>\n", | |
" <th>doubles</th>\n", | |
" <th>strings</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>1.2</td>\n", | |
" <td>1.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>1.2</td>\n", | |
" <td>1.3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>1.2</td>\n", | |
" <td>some string</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
] | |
}, | |
"metadata": { | |
"application/vnd.databricks.v1+output": { | |
"addedWidgets": {}, | |
"arguments": {}, | |
"data": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>ints</th>\n <th>doubles</th>\n <th>strings</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>1.0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>1.2</td>\n <td>1.2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1</td>\n <td>1.2</td>\n <td>1.3</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1</td>\n <td>1.2</td>\n <td>some string</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"datasetInfos": [], | |
"metadata": {}, | |
"removedWidgets": [], | |
"textData": null, | |
"type": "htmlSandbox" | |
} | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"pdf = pd.DataFrame(\n", | |
" data=data,\n", | |
" columns=[\"ints\", \"doubles\", \"strings\"]\n", | |
"pdf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "167f1cbf-6eea-4c83-83e3-9ae0f1683378", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Out[39]: ints int64\ndoubles float64\nstrings object\ndtype: object" | |
] | |
} | |
], | |
"source": [ | |
"pdf.dtypes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "3680ba03-5d5d-4794-a5ab-a780a2490ce2", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Out[36]: 0 True\n1 False\n2 False\n3 False\ndtype: bool" | |
] | |
} | |
], | |
"source": [ | |
"pdf.ints == pdf.doubles" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "9bcf4eaf-0606-43d7-909c-7309ff514c71", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Out[37]: 0 False\n1 False\n2 False\n3 False\ndtype: bool" | |
] | |
} | |
], | |
"source": [ | |
"pdf.ints == pdf.strings" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "40eae0b6-f550-48eb-8310-f8fd5d68f3ed", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Out[38]: 0 False\n1 False\n2 False\n3 False\ndtype: bool" | |
] | |
} | |
], | |
"source": [ | |
"pdf.doubles == pdf.strings" | |
] | |
} | |
], | |
"metadata": { | |
"application/vnd.databricks.v1+notebook": { | |
"dashboards": [], | |
"language": "python", | |
"notebookMetadata": { | |
"mostRecentlyExecutedCommandWithImplicitDF": { | |
"commandId": 1011127907182093, | |
"dataframes": [ | |
"_sqldf" | |
] | |
}, | |
"pythonIndentUnit": 4 | |
}, | |
"notebookName": "experimenting with comparing types", | |
"notebookOrigID": 2974607440111775, | |
"widgets": {} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment