Skip to content

Instantly share code, notes, and snippets.

@drorata
Created May 10, 2023 09:42
Show Gist options
  • Save drorata/2d2e4f145996337e042d71f3d101f14a to your computer and use it in GitHub Desktop.
Save drorata/2d2e4f145996337e042d71f3d101f14a to your computer and use it in GitHub Desktop.
Spark comparing columns
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "9ecca3b8-3c7d-44fd-ad67-cc4e75b816d5",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"from pyspark.sql import functions as F\n",
"from pyspark.sql.types import IntegerType, DoubleType, StringType, StructType"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "1aecdf9d-08e9-4093-9980-5469250678b7",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"<style scoped>\n",
" .table-result-container {\n",
" max-height: 300px;\n",
" overflow: auto;\n",
" }\n",
" table, th, td {\n",
" border: 1px solid black;\n",
" border-collapse: collapse;\n",
" }\n",
" th, td {\n",
" padding: 5px;\n",
" }\n",
" th {\n",
" text-align: left;\n",
" }\n",
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>ints</th><th>doubles</th><th>strings</th></tr></thead><tbody><tr><td>1</td><td>1.0</td><td>1</td></tr><tr><td>1</td><td>1.2</td><td>1.2</td></tr><tr><td>1</td><td>1.2</td><td>1.3</td></tr><tr><td>1</td><td>1.2</td><td>some string</td></tr></tbody></table></div>"
]
},
"metadata": {
"application/vnd.databricks.v1+output": {
"addedWidgets": {},
"aggData": [],
"aggError": "",
"aggOverflow": false,
"aggSchema": [],
"aggSeriesLimitReached": false,
"aggType": "",
"arguments": {},
"columnCustomDisplayInfos": {},
"data": [
[
1,
1.0,
"1"
],
[
1,
1.2,
"1.2"
],
[
1,
1.2,
"1.3"
],
[
1,
1.2,
"some string"
]
],
"datasetInfos": [],
"dbfsResultPath": null,
"isJsonSchema": true,
"metadata": {},
"overflow": false,
"plotOptions": {
"customPlotOptions": {},
"displayType": "table",
"pivotAggregation": null,
"pivotColumns": null,
"xColumns": null,
"yColumns": null
},
"removedWidgets": [],
"schema": [
{
"metadata": "{}",
"name": "ints",
"type": "\"integer\""
},
{
"metadata": "{}",
"name": "doubles",
"type": "\"double\""
},
{
"metadata": "{}",
"name": "strings",
"type": "\"string\""
}
],
"type": "table"
}
},
"output_type": "display_data"
}
],
"source": [
"cols = StructType([\n",
" StructField('ints', IntegerType()),\n",
" StructField('doubles', DoubleType()),\n",
" StructField('strings', StringType()),\n",
"])\n",
"data = [\n",
" [1, 1.0, \"1\"],\n",
" [1, 1.2, \"1.2\"],\n",
" [1, 1.2, \"1.3\"],\n",
" [1, 1.2, \"some string\"]\n",
"]\n",
"df = spark.createDataFrame(data, cols)\n",
"display(df)"
]
},
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "5c1d0d54-18f5-4e4a-9ddf-383ce5308ef1",
"showTitle": false,
"title": ""
}
},
"source": [
"# INTs vs DOUBLEs ✅"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "1fb7e2b9-1150-423a-8b50-d86ceb6e59ac",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"+----+-------+-----------+-------------+\n|ints|doubles| strings|int = doubles|\n+----+-------+-----------+-------------+\n| 1| 1.0| 1| true|\n| 1| 1.2| 1.2| false|\n| 1| 1.2| 1.3| false|\n| 1| 1.2|some string| false|\n+----+-------+-----------+-------------+\n\n"
]
}
],
"source": [
"df.withColumn(\"int = doubles\", F.col(\"ints\") == F.col(\"doubles\")).show()"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "7e7fff70-bb82-46e6-9b7c-debf1f227243",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('ints = 'doubles) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, int = doubles: boolean\nProject [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n*(1) Project [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- *(1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n"
]
}
],
"source": [
"df.withColumn(\"int = doubles\", F.col(\"ints\") == F.col(\"doubles\")).explain(True)"
]
},
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "8e14b7c4-4194-4a1d-8c58-e6970f443169",
"showTitle": false,
"title": ""
}
},
"source": [
"# INTs vs STRINGs ❌"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "eec13726-8878-4857-9f17-a4ae02faf7d8",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"+----+-------+-----------+------------+\n|ints|doubles| strings|int = string|\n+----+-------+-----------+------------+\n| 1| 1.0| 1| true|\n| 1| 1.2| 1.2| true|\n| 1| 1.2| 1.3| true|\n| 1| 1.2|some string| null|\n+----+-------+-----------+------------+\n\n"
]
}
],
"source": [
"df.withColumn(\"int = string\", F.col(\"ints\") == F.col(\"strings\")).show()"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "5a88bbc2-e457-4512-bb90-aeb3950ecc73",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('ints = 'strings) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, int = string: boolean\nProject [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n*(1) Project [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- *(1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n"
]
}
],
"source": [
"df.withColumn(\"int = string\", F.col(\"ints\") == F.col(\"strings\")).explain(True)"
]
},
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "d62e98b8-3675-4cb8-9049-3bbfbd5a05a7",
"showTitle": false,
"title": ""
}
},
"source": [
"# DOUBLEs vs STRINGs 🔱"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "af02f4f6-7e6a-489b-b6ed-e22175089449",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"+----+-------+-----------+---------------+\n|ints|doubles| strings|double = string|\n+----+-------+-----------+---------------+\n| 1| 1.0| 1| true|\n| 1| 1.2| 1.2| true|\n| 1| 1.2| 1.3| false|\n| 1| 1.2|some string| null|\n+----+-------+-----------+---------------+\n\n"
]
}
],
"source": [
"df.withColumn(\"double = string\", F.col(\"doubles\") == F.col(\"strings\")).show()"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "19201770-fc3b-49c0-ae35-01a3847c41f0",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('doubles = 'strings) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, double = string: boolean\nProject [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n*(1) Project [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- *(1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n"
]
}
],
"source": [
"df.withColumn(\"double = string\", F.col(\"doubles\") == F.col(\"strings\")).explain(True)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "7b193033-54c6-4b74-a7a3-3a590feef323",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"<style scoped>\n",
" .table-result-container {\n",
" max-height: 300px;\n",
" overflow: auto;\n",
" }\n",
" table, th, td {\n",
" border: 1px solid black;\n",
" border-collapse: collapse;\n",
" }\n",
" th, td {\n",
" padding: 5px;\n",
" }\n",
" th {\n",
" text-align: left;\n",
" }\n",
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>ints</th><th>values</th></tr></thead><tbody><tr><td>1</td><td>foo</td></tr><tr><td>1</td><td>bar</td></tr><tr><td>1</td><td>goo</td></tr><tr><td>1</td><td>loo</td></tr></tbody></table></div>"
]
},
"metadata": {
"application/vnd.databricks.v1+output": {
"addedWidgets": {},
"aggData": [],
"aggError": "",
"aggOverflow": false,
"aggSchema": [],
"aggSeriesLimitReached": false,
"aggType": "",
"arguments": {},
"columnCustomDisplayInfos": {},
"data": [
[
1,
"foo"
],
[
1,
"bar"
],
[
1,
"goo"
],
[
1,
"loo"
]
],
"datasetInfos": [],
"dbfsResultPath": null,
"isJsonSchema": true,
"metadata": {},
"overflow": false,
"plotOptions": {
"customPlotOptions": {},
"displayType": "table",
"pivotAggregation": null,
"pivotColumns": null,
"xColumns": null,
"yColumns": null
},
"removedWidgets": [],
"schema": [
{
"metadata": "{}",
"name": "ints",
"type": "\"integer\""
},
{
"metadata": "{}",
"name": "values",
"type": "\"string\""
}
],
"type": "table"
}
},
"output_type": "display_data"
}
],
"source": [
"cols = StructType([\n",
" StructField('ints', IntegerType()),\n",
" StructField('values', StringType()),\n",
"])\n",
"data = [\n",
" [1, \"foo\"],\n",
" [1, \"bar\"],\n",
" [1, \"goo\"],\n",
" [1, \"loo\"]\n",
"]\n",
"df1 = spark.createDataFrame(data, cols)\n",
"display(df1)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "31c3a02d-9823-4b73-852c-55aa52620b9b",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"<style scoped>\n",
" .table-result-container {\n",
" max-height: 300px;\n",
" overflow: auto;\n",
" }\n",
" table, th, td {\n",
" border: 1px solid black;\n",
" border-collapse: collapse;\n",
" }\n",
" th, td {\n",
" padding: 5px;\n",
" }\n",
" th {\n",
" text-align: left;\n",
" }\n",
"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>strings</th><th>values2</th></tr></thead><tbody><tr><td>1.0</td><td>foo2</td></tr><tr><td>1.2</td><td>bar2</td></tr><tr><td>1.3</td><td>goo2</td></tr><tr><td>some string</td><td>loo2</td></tr></tbody></table></div>"
]
},
"metadata": {
"application/vnd.databricks.v1+output": {
"addedWidgets": {},
"aggData": [],
"aggError": "",
"aggOverflow": false,
"aggSchema": [],
"aggSeriesLimitReached": false,
"aggType": "",
"arguments": {},
"columnCustomDisplayInfos": {},
"data": [
[
"1.0",
"foo2"
],
[
"1.2",
"bar2"
],
[
"1.3",
"goo2"
],
[
"some string",
"loo2"
]
],
"datasetInfos": [],
"dbfsResultPath": null,
"isJsonSchema": true,
"metadata": {},
"overflow": false,
"plotOptions": {
"customPlotOptions": {},
"displayType": "table",
"pivotAggregation": null,
"pivotColumns": null,
"xColumns": null,
"yColumns": null
},
"removedWidgets": [],
"schema": [
{
"metadata": "{}",
"name": "strings",
"type": "\"string\""
},
{
"metadata": "{}",
"name": "values2",
"type": "\"string\""
}
],
"type": "table"
}
},
"output_type": "display_data"
}
],
"source": [
"cols = StructType([\n",
" StructField('strings', StringType()),\n",
" StructField('values2', StringType()),\n",
"])\n",
"data = [\n",
" [\"1.0\", \"foo2\"],\n",
" [\"1.2\", \"bar2\"],\n",
" [\"1.3\", \"goo2\"],\n",
" [\"some string\", \"loo2\"]\n",
"]\n",
"df2 = spark.createDataFrame(data, cols)\n",
"display(df2)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "a1c5d327-eaca-4be9-918d-444d2142663b",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"+----+------+-------+-------+\n|ints|values|strings|values2|\n+----+------+-------+-------+\n| 1| foo| 1.3| goo2|\n| 1| foo| 1.2| bar2|\n| 1| foo| 1.0| foo2|\n| 1| bar| 1.3| goo2|\n| 1| bar| 1.2| bar2|\n| 1| bar| 1.0| foo2|\n| 1| goo| 1.3| goo2|\n| 1| goo| 1.2| bar2|\n| 1| goo| 1.0| foo2|\n| 1| loo| 1.3| goo2|\n| 1| loo| 1.2| bar2|\n| 1| loo| 1.0| foo2|\n+----+------+-------+-------+\n\n"
]
}
],
"source": [
"df1.join(df2, df1.ints == df2.strings, how=\"left\").show()"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "2fb24f20-6cfd-4ba0-9590-6c3130eb310a",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.databricks.v1+bamboolib_hint": "{\"pd.DataFrames\": [], \"version\": \"0.0.1\"}",
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "1b77d904-163e-44e4-abb7-406101b7cef2",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ints</th>\n",
" <th>doubles</th>\n",
" <th>strings</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1.2</td>\n",
" <td>1.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1.2</td>\n",
" <td>1.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1.2</td>\n",
" <td>some string</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"metadata": {
"application/vnd.databricks.v1+output": {
"addedWidgets": {},
"arguments": {},
"data": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>ints</th>\n <th>doubles</th>\n <th>strings</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>1.0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>1.2</td>\n <td>1.2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1</td>\n <td>1.2</td>\n <td>1.3</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1</td>\n <td>1.2</td>\n <td>some string</td>\n </tr>\n </tbody>\n</table>\n</div>",
"datasetInfos": [],
"metadata": {},
"removedWidgets": [],
"textData": null,
"type": "htmlSandbox"
}
},
"output_type": "display_data"
}
],
"source": [
"pdf = pd.DataFrame(\n",
" data=data,\n",
" columns=[\"ints\", \"doubles\", \"strings\"]\n",
"pdf"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "167f1cbf-6eea-4c83-83e3-9ae0f1683378",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"Out[39]: ints int64\ndoubles float64\nstrings object\ndtype: object"
]
}
],
"source": [
"pdf.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "3680ba03-5d5d-4794-a5ab-a780a2490ce2",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"Out[36]: 0 True\n1 False\n2 False\n3 False\ndtype: bool"
]
}
],
"source": [
"pdf.ints == pdf.doubles"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "9bcf4eaf-0606-43d7-909c-7309ff514c71",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"Out[37]: 0 False\n1 False\n2 False\n3 False\ndtype: bool"
]
}
],
"source": [
"pdf.ints == pdf.strings"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "40eae0b6-f550-48eb-8310-f8fd5d68f3ed",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"Out[38]: 0 False\n1 False\n2 False\n3 False\ndtype: bool"
]
}
],
"source": [
"pdf.doubles == pdf.strings"
]
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"dashboards": [],
"language": "python",
"notebookMetadata": {
"mostRecentlyExecutedCommandWithImplicitDF": {
"commandId": 1011127907182093,
"dataframes": [
"_sqldf"
]
},
"pythonIndentUnit": 4
},
"notebookName": "experimenting with comparing types",
"notebookOrigID": 2974607440111775,
"widgets": {}
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment