drorata/notebook.ipynb

## notebook.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "9ecca3b8-3c7d-44fd-ad67-cc4e75b816d5",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "from pyspark.sql import functions as F\n",
    "from pyspark.sql.types import IntegerType, DoubleType, StringType, StructType"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "1aecdf9d-08e9-4093-9980-5469250678b7",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/html": [
       "<style scoped>\n",
       "  .table-result-container {\n",
       "    max-height: 300px;\n",
       "    overflow: auto;\n",
       "  }\n",
       "  table, th, td {\n",
       "    border: 1px solid black;\n",
       "    border-collapse: collapse;\n",
       "  }\n",
       "  th, td {\n",
       "    padding: 5px;\n",
       "  }\n",
       "  th {\n",
       "    text-align: left;\n",
       "  }\n",
       "</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>ints</th><th>doubles</th><th>strings</th></tr></thead><tbody><tr><td>1</td><td>1.0</td><td>1</td></tr><tr><td>1</td><td>1.2</td><td>1.2</td></tr><tr><td>1</td><td>1.2</td><td>1.3</td></tr><tr><td>1</td><td>1.2</td><td>some string</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {
      "application/vnd.databricks.v1+output": {
       "addedWidgets": {},
       "aggData": [],
       "aggError": "",
       "aggOverflow": false,
       "aggSchema": [],
       "aggSeriesLimitReached": false,
       "aggType": "",
       "arguments": {},
       "columnCustomDisplayInfos": {},
       "data": [
        [
         1,
         1.0,
         "1"
        ],
        [
         1,
         1.2,
         "1.2"
        ],
        [
         1,
         1.2,
         "1.3"
        ],
        [
         1,
         1.2,
         "some string"
        ]
       ],
       "datasetInfos": [],
       "dbfsResultPath": null,
       "isJsonSchema": true,
       "metadata": {},
       "overflow": false,
       "plotOptions": {
        "customPlotOptions": {},
        "displayType": "table",
        "pivotAggregation": null,
        "pivotColumns": null,
        "xColumns": null,
        "yColumns": null
       },
       "removedWidgets": [],
       "schema": [
        {
         "metadata": "{}",
         "name": "ints",
         "type": "\"integer\""
        },
        {
         "metadata": "{}",
         "name": "doubles",
         "type": "\"double\""
        },
        {
         "metadata": "{}",
         "name": "strings",
         "type": "\"string\""
        }
       ],
       "type": "table"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "cols = StructType([\n",
    "    StructField('ints', IntegerType()),\n",
    "    StructField('doubles', DoubleType()),\n",
    "    StructField('strings', StringType()),\n",
    "])\n",
    "data = [\n",
    "    [1, 1.0, \"1\"],\n",
    "    [1, 1.2, \"1.2\"],\n",
    "    [1, 1.2, \"1.3\"],\n",
    "    [1, 1.2, \"some string\"]\n",
    "]\n",
    "df = spark.createDataFrame(data, cols)\n",
    "display(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "5c1d0d54-18f5-4e4a-9ddf-383ce5308ef1",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# INTs vs DOUBLEs ✅"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "1fb7e2b9-1150-423a-8b50-d86ceb6e59ac",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----+-------+-----------+-------------+\n|ints|doubles|    strings|int = doubles|\n+----+-------+-----------+-------------+\n|   1|    1.0|          1|         true|\n|   1|    1.2|        1.2|        false|\n|   1|    1.2|        1.3|        false|\n|   1|    1.2|some string|        false|\n+----+-------+-----------+-------------+\n\n"
     ]
    }
   ],
   "source": [
    "df.withColumn(\"int = doubles\", F.col(\"ints\") == F.col(\"doubles\")).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "7e7fff70-bb82-46e6-9b7c-debf1f227243",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('ints = 'doubles) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, int = doubles: boolean\nProject [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n*(1) Project [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- *(1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n"
     ]
    }
   ],
   "source": [
    "df.withColumn(\"int = doubles\", F.col(\"ints\") == F.col(\"doubles\")).explain(True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "8e14b7c4-4194-4a1d-8c58-e6970f443169",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# INTs vs STRINGs ❌"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "eec13726-8878-4857-9f17-a4ae02faf7d8",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----+-------+-----------+------------+\n|ints|doubles|    strings|int = string|\n+----+-------+-----------+------------+\n|   1|    1.0|          1|        true|\n|   1|    1.2|        1.2|        true|\n|   1|    1.2|        1.3|        true|\n|   1|    1.2|some string|        null|\n+----+-------+-----------+------------+\n\n"
     ]
    }
   ],
   "source": [
    "df.withColumn(\"int = string\", F.col(\"ints\") == F.col(\"strings\")).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "5a88bbc2-e457-4512-bb90-aeb3950ecc73",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('ints = 'strings) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, int = string: boolean\nProject [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n*(1) Project [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- *(1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n"
     ]
    }
   ],
   "source": [
    "df.withColumn(\"int = string\", F.col(\"ints\") == F.col(\"strings\")).explain(True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "d62e98b8-3675-4cb8-9049-3bbfbd5a05a7",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# DOUBLEs vs STRINGs 🔱"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "af02f4f6-7e6a-489b-b6ed-e22175089449",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----+-------+-----------+---------------+\n|ints|doubles|    strings|double = string|\n+----+-------+-----------+---------------+\n|   1|    1.0|          1|           true|\n|   1|    1.2|        1.2|           true|\n|   1|    1.2|        1.3|          false|\n|   1|    1.2|some string|           null|\n+----+-------+-----------+---------------+\n\n"
     ]
    }
   ],
   "source": [
    "df.withColumn(\"double = string\", F.col(\"doubles\") == F.col(\"strings\")).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "19201770-fc3b-49c0-ae35-01a3847c41f0",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('doubles = 'strings) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, double = string: boolean\nProject [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n*(1) Project [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- *(1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n"
     ]
    }
   ],
   "source": [
    "df.withColumn(\"double = string\", F.col(\"doubles\") == F.col(\"strings\")).explain(True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "7b193033-54c6-4b74-a7a3-3a590feef323",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/html": [
       "<style scoped>\n",
       "  .table-result-container {\n",
       "    max-height: 300px;\n",
       "    overflow: auto;\n",
       "  }\n",
       "  table, th, td {\n",
       "    border: 1px solid black;\n",
       "    border-collapse: collapse;\n",
       "  }\n",
       "  th, td {\n",
       "    padding: 5px;\n",
       "  }\n",
       "  th {\n",
       "    text-align: left;\n",
       "  }\n",
       "</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>ints</th><th>values</th></tr></thead><tbody><tr><td>1</td><td>foo</td></tr><tr><td>1</td><td>bar</td></tr><tr><td>1</td><td>goo</td></tr><tr><td>1</td><td>loo</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {
      "application/vnd.databricks.v1+output": {
       "addedWidgets": {},
       "aggData": [],
       "aggError": "",
       "aggOverflow": false,
       "aggSchema": [],
       "aggSeriesLimitReached": false,
       "aggType": "",
       "arguments": {},
       "columnCustomDisplayInfos": {},
       "data": [
        [
         1,
         "foo"
        ],
        [
         1,
         "bar"
        ],
        [
         1,
         "goo"
        ],
        [
         1,
         "loo"
        ]
       ],
       "datasetInfos": [],
       "dbfsResultPath": null,
       "isJsonSchema": true,
       "metadata": {},
       "overflow": false,
       "plotOptions": {
        "customPlotOptions": {},
        "displayType": "table",
        "pivotAggregation": null,
        "pivotColumns": null,
        "xColumns": null,
        "yColumns": null
       },
       "removedWidgets": [],
       "schema": [
        {
         "metadata": "{}",
         "name": "ints",
         "type": "\"integer\""
        },
        {
         "metadata": "{}",
         "name": "values",
         "type": "\"string\""
        }
       ],
       "type": "table"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "cols = StructType([\n",
    "    StructField('ints', IntegerType()),\n",
    "    StructField('values', StringType()),\n",
    "])\n",
    "data = [\n",
    "    [1, \"foo\"],\n",
    "    [1, \"bar\"],\n",
    "    [1, \"goo\"],\n",
    "    [1, \"loo\"]\n",
    "]\n",
    "df1 = spark.createDataFrame(data, cols)\n",
    "display(df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "31c3a02d-9823-4b73-852c-55aa52620b9b",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/html": [
       "<style scoped>\n",
       "  .table-result-container {\n",
       "    max-height: 300px;\n",
       "    overflow: auto;\n",
       "  }\n",
       "  table, th, td {\n",
       "    border: 1px solid black;\n",
       "    border-collapse: collapse;\n",
       "  }\n",
       "  th, td {\n",
       "    padding: 5px;\n",
       "  }\n",
       "  th {\n",
       "    text-align: left;\n",
       "  }\n",
       "</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>strings</th><th>values2</th></tr></thead><tbody><tr><td>1.0</td><td>foo2</td></tr><tr><td>1.2</td><td>bar2</td></tr><tr><td>1.3</td><td>goo2</td></tr><tr><td>some string</td><td>loo2</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {
      "application/vnd.databricks.v1+output": {
       "addedWidgets": {},
       "aggData": [],
       "aggError": "",
       "aggOverflow": false,
       "aggSchema": [],
       "aggSeriesLimitReached": false,
       "aggType": "",
       "arguments": {},
       "columnCustomDisplayInfos": {},
       "data": [
        [
         "1.0",
         "foo2"
        ],
        [
         "1.2",
         "bar2"
        ],
        [
         "1.3",
         "goo2"
        ],
        [
         "some string",
         "loo2"
        ]
       ],
       "datasetInfos": [],
       "dbfsResultPath": null,
       "isJsonSchema": true,
       "metadata": {},
       "overflow": false,
       "plotOptions": {
        "customPlotOptions": {},
        "displayType": "table",
        "pivotAggregation": null,
        "pivotColumns": null,
        "xColumns": null,
        "yColumns": null
       },
       "removedWidgets": [],
       "schema": [
        {
         "metadata": "{}",
         "name": "strings",
         "type": "\"string\""
        },
        {
         "metadata": "{}",
         "name": "values2",
         "type": "\"string\""
        }
       ],
       "type": "table"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "cols = StructType([\n",
    "    StructField('strings', StringType()),\n",
    "    StructField('values2', StringType()),\n",
    "])\n",
    "data = [\n",
    "    [\"1.0\", \"foo2\"],\n",
    "    [\"1.2\", \"bar2\"],\n",
    "    [\"1.3\", \"goo2\"],\n",
    "    [\"some string\", \"loo2\"]\n",
    "]\n",
    "df2 = spark.createDataFrame(data, cols)\n",
    "display(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "a1c5d327-eaca-4be9-918d-444d2142663b",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----+------+-------+-------+\n|ints|values|strings|values2|\n+----+------+-------+-------+\n|   1|   foo|    1.3|   goo2|\n|   1|   foo|    1.2|   bar2|\n|   1|   foo|    1.0|   foo2|\n|   1|   bar|    1.3|   goo2|\n|   1|   bar|    1.2|   bar2|\n|   1|   bar|    1.0|   foo2|\n|   1|   goo|    1.3|   goo2|\n|   1|   goo|    1.2|   bar2|\n|   1|   goo|    1.0|   foo2|\n|   1|   loo|    1.3|   goo2|\n|   1|   loo|    1.2|   bar2|\n|   1|   loo|    1.0|   foo2|\n+----+------+-------+-------+\n\n"
     ]
    }
   ],
   "source": [
    "df1.join(df2, df1.ints == df2.strings, how=\"left\").show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "2fb24f20-6cfd-4ba0-9590-6c3130eb310a",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "application/vnd.databricks.v1+bamboolib_hint": "{\"pd.DataFrames\": [], \"version\": \"0.0.1\"}",
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "1b77d904-163e-44e4-abb7-406101b7cef2",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ints</th>\n",
       "      <th>doubles</th>\n",
       "      <th>strings</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1.2</td>\n",
       "      <td>1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1.2</td>\n",
       "      <td>1.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1.2</td>\n",
       "      <td>some string</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "metadata": {
      "application/vnd.databricks.v1+output": {
       "addedWidgets": {},
       "arguments": {},
       "data": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>ints</th>\n      <th>doubles</th>\n      <th>strings</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>1.0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>1.2</td>\n      <td>1.2</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>1.2</td>\n      <td>1.3</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>1.2</td>\n      <td>some string</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
       "datasetInfos": [],
       "metadata": {},
       "removedWidgets": [],
       "textData": null,
       "type": "htmlSandbox"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "pdf = pd.DataFrame(\n",
    "    data=data,\n",
    "    columns=[\"ints\", \"doubles\", \"strings\"]\n",
    "pdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "167f1cbf-6eea-4c83-83e3-9ae0f1683378",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Out[39]: ints         int64\ndoubles    float64\nstrings     object\ndtype: object"
     ]
    }
   ],
   "source": [
    "pdf.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "3680ba03-5d5d-4794-a5ab-a780a2490ce2",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Out[36]: 0     True\n1    False\n2    False\n3    False\ndtype: bool"
     ]
    }
   ],
   "source": [
    "pdf.ints == pdf.doubles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "9bcf4eaf-0606-43d7-909c-7309ff514c71",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Out[37]: 0    False\n1    False\n2    False\n3    False\ndtype: bool"
     ]
    }
   ],
   "source": [
    "pdf.ints == pdf.strings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "40eae0b6-f550-48eb-8310-f8fd5d68f3ed",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Out[38]: 0    False\n1    False\n2    False\n3    False\ndtype: bool"
     ]
    }
   ],
   "source": [
    "pdf.doubles == pdf.strings"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "mostRecentlyExecutedCommandWithImplicitDF": {
     "commandId": 1011127907182093,
     "dataframes": [
      "_sqldf"
     ]
    },
    "pythonIndentUnit": 4
   },
   "notebookName": "experimenting with comparing types",
   "notebookOrigID": 2974607440111775,
   "widgets": {}
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "9ecca3b8-3c7d-44fd-ad67-cc4e75b816d5",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [],
	"source": [
	"from pyspark.sql import functions as F\n",
	"from pyspark.sql.types import IntegerType, DoubleType, StringType, StructType"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "1aecdf9d-08e9-4093-9980-5469250678b7",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/html": [
	"<style scoped>\n",
	" .table-result-container {\n",
	" max-height: 300px;\n",
	" overflow: auto;\n",
	" }\n",
	" table, th, td {\n",
	" border: 1px solid black;\n",
	" border-collapse: collapse;\n",
	" }\n",
	" th, td {\n",
	" padding: 5px;\n",
	" }\n",
	" th {\n",
	" text-align: left;\n",
	" }\n",
	"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>ints</th><th>doubles</th><th>strings</th></tr></thead><tbody><tr><td>1</td><td>1.0</td><td>1</td></tr><tr><td>1</td><td>1.2</td><td>1.2</td></tr><tr><td>1</td><td>1.2</td><td>1.3</td></tr><tr><td>1</td><td>1.2</td><td>some string</td></tr></tbody></table></div>"
	]
	},
	"metadata": {
	"application/vnd.databricks.v1+output": {
	"addedWidgets": {},
	"aggData": [],
	"aggError": "",
	"aggOverflow": false,
	"aggSchema": [],
	"aggSeriesLimitReached": false,
	"aggType": "",
	"arguments": {},
	"columnCustomDisplayInfos": {},
	"data": [
	[
	1,
	1.0,
	"1"
	],
	[
	1,
	1.2,
	"1.2"
	],
	[
	1,
	1.2,
	"1.3"
	],
	[
	1,
	1.2,
	"some string"
	]
	],
	"datasetInfos": [],
	"dbfsResultPath": null,
	"isJsonSchema": true,
	"metadata": {},
	"overflow": false,
	"plotOptions": {
	"customPlotOptions": {},
	"displayType": "table",
	"pivotAggregation": null,
	"pivotColumns": null,
	"xColumns": null,
	"yColumns": null
	},
	"removedWidgets": [],
	"schema": [
	{
	"metadata": "{}",
	"name": "ints",
	"type": "\"integer\""
	},
	{
	"metadata": "{}",
	"name": "doubles",
	"type": "\"double\""
	},
	{
	"metadata": "{}",
	"name": "strings",
	"type": "\"string\""
	}
	],
	"type": "table"
	}
	},
	"output_type": "display_data"
	}
	],
	"source": [
	"cols = StructType([\n",
	" StructField('ints', IntegerType()),\n",
	" StructField('doubles', DoubleType()),\n",
	" StructField('strings', StringType()),\n",
	"])\n",
	"data = [\n",
	" [1, 1.0, \"1\"],\n",
	" [1, 1.2, \"1.2\"],\n",
	" [1, 1.2, \"1.3\"],\n",
	" [1, 1.2, \"some string\"]\n",
	"]\n",
	"df = spark.createDataFrame(data, cols)\n",
	"display(df)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {},
	"inputWidgets": {},
	"nuid": "5c1d0d54-18f5-4e4a-9ddf-383ce5308ef1",
	"showTitle": false,
	"title": ""
	}
	},
	"source": [
	"# INTs vs DOUBLEs ✅"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "1fb7e2b9-1150-423a-8b50-d86ceb6e59ac",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"+----+-------+-----------+-------------+\n\|ints\|doubles\| strings\|int = doubles\|\n+----+-------+-----------+-------------+\n\| 1\| 1.0\| 1\| true\|\n\| 1\| 1.2\| 1.2\| false\|\n\| 1\| 1.2\| 1.3\| false\|\n\| 1\| 1.2\|some string\| false\|\n+----+-------+-----------+-------------+\n\n"
	]
	}
	],
	"source": [
	"df.withColumn(\"int = doubles\", F.col(\"ints\") == F.col(\"doubles\")).show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "7e7fff70-bb82-46e6-9b7c-debf1f227243",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('ints = 'doubles) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, int = doubles: boolean\nProject [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n(1) Project [ints#92, doubles#93, strings#94, (cast(ints#92 as double) = doubles#93) AS int = doubles#188]\n+- (1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n"
	]
	}
	],
	"source": [
	"df.withColumn(\"int = doubles\", F.col(\"ints\") == F.col(\"doubles\")).explain(True)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {},
	"inputWidgets": {},
	"nuid": "8e14b7c4-4194-4a1d-8c58-e6970f443169",
	"showTitle": false,
	"title": ""
	}
	},
	"source": [
	"# INTs vs STRINGs ❌"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "eec13726-8878-4857-9f17-a4ae02faf7d8",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"+----+-------+-----------+------------+\n\|ints\|doubles\| strings\|int = string\|\n+----+-------+-----------+------------+\n\| 1\| 1.0\| 1\| true\|\n\| 1\| 1.2\| 1.2\| true\|\n\| 1\| 1.2\| 1.3\| true\|\n\| 1\| 1.2\|some string\| null\|\n+----+-------+-----------+------------+\n\n"
	]
	}
	],
	"source": [
	"df.withColumn(\"int = string\", F.col(\"ints\") == F.col(\"strings\")).show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "5a88bbc2-e457-4512-bb90-aeb3950ecc73",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('ints = 'strings) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, int = string: boolean\nProject [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n(1) Project [ints#92, doubles#93, strings#94, (ints#92 = cast(strings#94 as int)) AS int = string#169]\n+- (1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n"
	]
	}
	],
	"source": [
	"df.withColumn(\"int = string\", F.col(\"ints\") == F.col(\"strings\")).explain(True)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {},
	"inputWidgets": {},
	"nuid": "d62e98b8-3675-4cb8-9049-3bbfbd5a05a7",
	"showTitle": false,
	"title": ""
	}
	},
	"source": [
	"# DOUBLEs vs STRINGs 🔱"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "af02f4f6-7e6a-489b-b6ed-e22175089449",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"+----+-------+-----------+---------------+\n\|ints\|doubles\| strings\|double = string\|\n+----+-------+-----------+---------------+\n\| 1\| 1.0\| 1\| true\|\n\| 1\| 1.2\| 1.2\| true\|\n\| 1\| 1.2\| 1.3\| false\|\n\| 1\| 1.2\|some string\| null\|\n+----+-------+-----------+---------------+\n\n"
	]
	}
	],
	"source": [
	"df.withColumn(\"double = string\", F.col(\"doubles\") == F.col(\"strings\")).show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "19201770-fc3b-49c0-ae35-01a3847c41f0",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"== Parsed Logical Plan ==\n'Project [ints#92, doubles#93, strings#94, ('doubles = 'strings) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Analyzed Logical Plan ==\nints: int, doubles: double, strings: string, double = string: boolean\nProject [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Optimized Logical Plan ==\nProject [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- LogicalRDD [ints#92, doubles#93, strings#94], false\n\n== Physical Plan ==\n(1) Project [ints#92, doubles#93, strings#94, (doubles#93 = cast(strings#94 as double)) AS double = string#207]\n+- (1) Scan ExistingRDD[ints#92,doubles#93,strings#94]\n\n"
	]
	}
	],
	"source": [
	"df.withColumn(\"double = string\", F.col(\"doubles\") == F.col(\"strings\")).explain(True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "7b193033-54c6-4b74-a7a3-3a590feef323",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/html": [
	"<style scoped>\n",
	" .table-result-container {\n",
	" max-height: 300px;\n",
	" overflow: auto;\n",
	" }\n",
	" table, th, td {\n",
	" border: 1px solid black;\n",
	" border-collapse: collapse;\n",
	" }\n",
	" th, td {\n",
	" padding: 5px;\n",
	" }\n",
	" th {\n",
	" text-align: left;\n",
	" }\n",
	"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>ints</th><th>values</th></tr></thead><tbody><tr><td>1</td><td>foo</td></tr><tr><td>1</td><td>bar</td></tr><tr><td>1</td><td>goo</td></tr><tr><td>1</td><td>loo</td></tr></tbody></table></div>"
	]
	},
	"metadata": {
	"application/vnd.databricks.v1+output": {
	"addedWidgets": {},
	"aggData": [],
	"aggError": "",
	"aggOverflow": false,
	"aggSchema": [],
	"aggSeriesLimitReached": false,
	"aggType": "",
	"arguments": {},
	"columnCustomDisplayInfos": {},
	"data": [
	[
	1,
	"foo"
	],
	[
	1,
	"bar"
	],
	[
	1,
	"goo"
	],
	[
	1,
	"loo"
	]
	],
	"datasetInfos": [],
	"dbfsResultPath": null,
	"isJsonSchema": true,
	"metadata": {},
	"overflow": false,
	"plotOptions": {
	"customPlotOptions": {},
	"displayType": "table",
	"pivotAggregation": null,
	"pivotColumns": null,
	"xColumns": null,
	"yColumns": null
	},
	"removedWidgets": [],
	"schema": [
	{
	"metadata": "{}",
	"name": "ints",
	"type": "\"integer\""
	},
	{
	"metadata": "{}",
	"name": "values",
	"type": "\"string\""
	}
	],
	"type": "table"
	}
	},
	"output_type": "display_data"
	}
	],
	"source": [
	"cols = StructType([\n",
	" StructField('ints', IntegerType()),\n",
	" StructField('values', StringType()),\n",
	"])\n",
	"data = [\n",
	" [1, \"foo\"],\n",
	" [1, \"bar\"],\n",
	" [1, \"goo\"],\n",
	" [1, \"loo\"]\n",
	"]\n",
	"df1 = spark.createDataFrame(data, cols)\n",
	"display(df1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "31c3a02d-9823-4b73-852c-55aa52620b9b",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/html": [
	"<style scoped>\n",
	" .table-result-container {\n",
	" max-height: 300px;\n",
	" overflow: auto;\n",
	" }\n",
	" table, th, td {\n",
	" border: 1px solid black;\n",
	" border-collapse: collapse;\n",
	" }\n",
	" th, td {\n",
	" padding: 5px;\n",
	" }\n",
	" th {\n",
	" text-align: left;\n",
	" }\n",
	"</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>strings</th><th>values2</th></tr></thead><tbody><tr><td>1.0</td><td>foo2</td></tr><tr><td>1.2</td><td>bar2</td></tr><tr><td>1.3</td><td>goo2</td></tr><tr><td>some string</td><td>loo2</td></tr></tbody></table></div>"
	]
	},
	"metadata": {
	"application/vnd.databricks.v1+output": {
	"addedWidgets": {},
	"aggData": [],
	"aggError": "",
	"aggOverflow": false,
	"aggSchema": [],
	"aggSeriesLimitReached": false,
	"aggType": "",
	"arguments": {},
	"columnCustomDisplayInfos": {},
	"data": [
	[
	"1.0",
	"foo2"
	],
	[
	"1.2",
	"bar2"
	],
	[
	"1.3",
	"goo2"
	],
	[
	"some string",
	"loo2"
	]
	],
	"datasetInfos": [],
	"dbfsResultPath": null,
	"isJsonSchema": true,
	"metadata": {},
	"overflow": false,
	"plotOptions": {
	"customPlotOptions": {},
	"displayType": "table",
	"pivotAggregation": null,
	"pivotColumns": null,
	"xColumns": null,
	"yColumns": null
	},
	"removedWidgets": [],
	"schema": [
	{
	"metadata": "{}",
	"name": "strings",
	"type": "\"string\""
	},
	{
	"metadata": "{}",
	"name": "values2",
	"type": "\"string\""
	}
	],
	"type": "table"
	}
	},
	"output_type": "display_data"
	}
	],
	"source": [
	"cols = StructType([\n",
	" StructField('strings', StringType()),\n",
	" StructField('values2', StringType()),\n",
	"])\n",
	"data = [\n",
	" [\"1.0\", \"foo2\"],\n",
	" [\"1.2\", \"bar2\"],\n",
	" [\"1.3\", \"goo2\"],\n",
	" [\"some string\", \"loo2\"]\n",
	"]\n",
	"df2 = spark.createDataFrame(data, cols)\n",
	"display(df2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "a1c5d327-eaca-4be9-918d-444d2142663b",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"+----+------+-------+-------+\n\|ints\|values\|strings\|values2\|\n+----+------+-------+-------+\n\| 1\| foo\| 1.3\| goo2\|\n\| 1\| foo\| 1.2\| bar2\|\n\| 1\| foo\| 1.0\| foo2\|\n\| 1\| bar\| 1.3\| goo2\|\n\| 1\| bar\| 1.2\| bar2\|\n\| 1\| bar\| 1.0\| foo2\|\n\| 1\| goo\| 1.3\| goo2\|\n\| 1\| goo\| 1.2\| bar2\|\n\| 1\| goo\| 1.0\| foo2\|\n\| 1\| loo\| 1.3\| goo2\|\n\| 1\| loo\| 1.2\| bar2\|\n\| 1\| loo\| 1.0\| foo2\|\n+----+------+-------+-------+\n\n"
	]
	}
	],
	"source": [
	"df1.join(df2, df1.ints == df2.strings, how=\"left\").show()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "2fb24f20-6cfd-4ba0-9590-6c3130eb310a",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"application/vnd.databricks.v1+bamboolib_hint": "{\"pd.DataFrames\": [], \"version\": \"0.0.1\"}",
	"text/plain": []
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"import pandas as pd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "1b77d904-163e-44e4-abb7-406101b7cef2",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "display_data",
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>ints</th>\n",
	" <th>doubles</th>\n",
	" <th>strings</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>1.0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1</td>\n",
	" <td>1.2</td>\n",
	" <td>1.2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>1</td>\n",
	" <td>1.2</td>\n",
	" <td>1.3</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>1</td>\n",
	" <td>1.2</td>\n",
	" <td>some string</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	]
	},
	"metadata": {
	"application/vnd.databricks.v1+output": {
	"addedWidgets": {},
	"arguments": {},
	"data": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>ints</th>\n <th>doubles</th>\n <th>strings</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>1.0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>1.2</td>\n <td>1.2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1</td>\n <td>1.2</td>\n <td>1.3</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1</td>\n <td>1.2</td>\n <td>some string</td>\n </tr>\n </tbody>\n</table>\n</div>",
	"datasetInfos": [],
	"metadata": {},
	"removedWidgets": [],
	"textData": null,
	"type": "htmlSandbox"
	}
	},
	"output_type": "display_data"
	}
	],
	"source": [
	"pdf = pd.DataFrame(\n",
	" data=data,\n",
	" columns=[\"ints\", \"doubles\", \"strings\"]\n",
	"pdf"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "167f1cbf-6eea-4c83-83e3-9ae0f1683378",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Out[39]: ints int64\ndoubles float64\nstrings object\ndtype: object"
	]
	}
	],
	"source": [
	"pdf.dtypes"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "3680ba03-5d5d-4794-a5ab-a780a2490ce2",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Out[36]: 0 True\n1 False\n2 False\n3 False\ndtype: bool"
	]
	}
	],
	"source": [
	"pdf.ints == pdf.doubles"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "9bcf4eaf-0606-43d7-909c-7309ff514c71",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Out[37]: 0 False\n1 False\n2 False\n3 False\ndtype: bool"
	]
	}
	],
	"source": [
	"pdf.ints == pdf.strings"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 0,
	"metadata": {
	"application/vnd.databricks.v1+cell": {
	"cellMetadata": {
	"byteLimit": 2048000,
	"rowLimit": 10000
	},
	"inputWidgets": {},
	"nuid": "40eae0b6-f550-48eb-8310-f8fd5d68f3ed",
	"showTitle": false,
	"title": ""
	}
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Out[38]: 0 False\n1 False\n2 False\n3 False\ndtype: bool"
	]
	}
	],
	"source": [
	"pdf.doubles == pdf.strings"
	]
	}
	],
	"metadata": {
	"application/vnd.databricks.v1+notebook": {
	"dashboards": [],
	"language": "python",
	"notebookMetadata": {
	"mostRecentlyExecutedCommandWithImplicitDF": {
	"commandId": 1011127907182093,
	"dataframes": [
	"_sqldf"
	]
	},
	"pythonIndentUnit": 4
	},
	"notebookName": "experimenting with comparing types",
	"notebookOrigID": 2974607440111775,
	"widgets": {}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}