Last active
June 26, 2024 03:23
-
-
Save orellabac/c21308663e754736983602a49986f4c6 to your computer and use it in GitHub Desktop.
Sample Notebook that can be used to use Snowpark In a Databricks Notebook
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "897eb1dd-619b-4948-a277-9440ce33a4e9", | |
"showTitle": true, | |
"title": "Register MAGIC cell" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# use this extensions if you want to have %%sql magics that run your code in snowpark\n", | |
"try:\n", | |
" from IPython.core.magic import register_cell_magic\n", | |
" def sql(line, cell):\n", | |
" import IPython\n", | |
" import re\n", | |
" user_ns = IPython.get_ipython().user_ns\n", | |
" if \"session\" in user_ns:\n", | |
" session = user_ns['session']\n", | |
" #if you have installed jinja you can use it\n", | |
" #from jinja2 import Template\n", | |
" #t = Template(cell)\n", | |
" #res = t.render(user_ns)\n", | |
" res = cell\n", | |
" name = None\n", | |
" if line and line.strip():\n", | |
" name = line.strip().split(\" \")[0]\n", | |
" # If there are several statements only last will be returned\n", | |
" # also we will remove all ; at the end to avoid issues with empty statements\n", | |
" res = re.sub(r';+$', '', res)\n", | |
" for cursor in session.connection.execute_string(res):\n", | |
" df = session.sql(f\"SELECT * FROM TABLE(RESULT_SCAN('{cursor.sfqid}'))\")\n", | |
" # to avoid needed to do a count on display\n", | |
" setattr(df,\"_cached_rowcount\",cursor.rowcount)\n", | |
" if name:\n", | |
" user_ns[name] = df\n", | |
" else:\n", | |
" user_ns[\"__df\"] = df\n", | |
" return df\n", | |
" else:\n", | |
" return \"No session was found. You can setup one by running: session = Session.builder.getOrCreate()\"\n", | |
" register_cell_magic(sql)\n", | |
"except:\n", | |
" print(\"error with magic\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "595b9679-ffc6-414f-a278-f14155b12ab1", | |
"showTitle": true, | |
"title": "ExtendDataFrame" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# use this extensions for better integration with notebooks\n", | |
"from snowflake.snowpark import DataFrame\n", | |
"import pandas as pd\n", | |
"if not hasattr(DataFrame,\"___extended\"):\n", | |
" \n", | |
" DataFrame.___extended = True\n", | |
"\n", | |
" def _repr_html_(self):\n", | |
" import IPython\n", | |
" rows_limit = getattr(DataFrame,'__rows_limit',50)\n", | |
" if 'display' in globals():\n", | |
" display = globals()['display']\n", | |
" elif 'display' in IPython.get_ipython().user_ns:\n", | |
" display = IPython.get_ipython().user_ns['display']\n", | |
" else:\n", | |
" from IPython.display import display\n", | |
" try:\n", | |
" count = self._cached_rowcount if hasattr(self,\"_cached_rowcount\") else self.count()\n", | |
" self.count()\n", | |
" if count == 0:\n", | |
" return \"No rows to display\"\n", | |
" elif count == 1:\n", | |
" df = pd.DataFrame.from_records([x.as_dict() for x in self.collect()])\n", | |
" elif count > rows_limit:\n", | |
" print(f\"There are {count} rows. Showing only {rows_limit}. Change DataFrame.__rows_limit value to display more rows\")\n", | |
" df = self.limit(rows_limit).to_pandas()\n", | |
" else:\n", | |
" df = self.to_pandas()\n", | |
" display(df)\n", | |
" return \"\"\n", | |
" except Exception as ex:\n", | |
" return str(ex)\n", | |
"\n", | |
" setattr(DataFrame,'_repr_html_',_repr_html_)\n", | |
" setattr(DataFrame,'__rows_limit',50)\n", | |
"\n", | |
" def _ipython_key_completions_(self):\n", | |
" \"\"\"Returns the names of columns in this :class:`DataFrame`.\n", | |
" \"\"\"\n", | |
" return self.columns\n", | |
" setattr(DataFrame,'_ipython_key_completions_',_ipython_key_completions_)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "2feac9dd-cb67-4f01-a229-a77952467997", | |
"showTitle": true, | |
"title": "ExtendSession" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"from snowflake.snowpark import Session\n", | |
"\n", | |
"if not hasattr(Session,\"___extended\"):\n", | |
" setattr(Session,\"___extended\",True)\n", | |
" def __repr__(self):\n", | |
" from snowflake.snowpark import Session\n", | |
" from snowflake.snowpark.version import VERSION\n", | |
" snowflake_environment = self.sql('select current_version()').collect()\n", | |
" snowpark_version = VERSION\n", | |
" # Current Environment Details\n", | |
" print( \n", | |
" f'User : {self.get_current_user()}\\n' +\\\n", | |
" f'Role : {self.get_current_role()}\\n' +\\\n", | |
" f'Warehouse : {self.get_current_warehouse()}\\n' +\\\n", | |
" f'Database : {self.get_current_database()}\\n' +\\\n", | |
" f'Schema : {self.get_current_schema()}\\n' +\\\n", | |
" f'Snowflake version : {snowflake_environment[0][0]}\\n' +\\\n", | |
" f'Snowpark for Python version : {snowpark_version[0]}.{snowpark_version[1]}.{snowpark_version[2]}\\n')\n", | |
" \n", | |
" # this method is added for IPython compatibility\n", | |
" setattr(Session,\"__repr__\",__repr__)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "e04de2ab-42a6-41be-b00d-8fafcbf42582", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# as a recommended practice gather this from secrets\n", | |
"\n", | |
"import os\n", | |
"scope = \"sf_secrets_demo\"\n", | |
"os.environ[\"SNOWFLAKE_CONNECTIONS\"] = f\"\"\"\n", | |
"\n", | |
"default_connection_name = \"default\"\n", | |
"[connections]\n", | |
"\n", | |
"[default]\n", | |
"account = \"{dbutils.secrets.get(scope, \"sf-account\")}\"\n", | |
"user = \"{dbutils.secrets.get(scope, \"sf-user\")}\"\n", | |
"password = \"{dbutils.secrets.get(scope, \"sf-password\")}\"\n", | |
"role = \"{dbutils.secrets.get(scope, \"sf-role\")}\"\n", | |
"database = \"{dbutils.secrets.get(scope, \"sf-database\")}\"\n", | |
"schema = \"{dbutils.secrets.get(scope, \"sf-schema\")}\"\n", | |
"warehouse = \"{dbutils.secrets.get(scope, \"sf-warehouse\")}\"\n", | |
"\n", | |
"\"\"\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "bb0a76fe-ac3a-4f38-9c9f-da064ee86b6f", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"from snowflake.snowpark.session import Session\n", | |
"import snowflake.snowpark.functions as f\n", | |
"from snowflake.snowpark.functions import lower, col, current_timestamp, regexp_replace, lit, concat, udf\n", | |
"\n", | |
"\n", | |
"# create snowpark session\n", | |
"notebook_name = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()\n", | |
"session = Session.builder.app_name(notebook_name).getOrCreate()\n", | |
"session.sql_simplifier_enabled = True\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": {}, | |
"inputWidgets": {}, | |
"nuid": "458d052c-8479-4c37-a25b-49b2aa509d20", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"source": [ | |
"![Snowpark Loaded](https://developers.snowflake.com/wp-content/uploads/2022/06/Snowpark@2x.png)" | |
] | |
} | |
], | |
"metadata": { | |
"application/vnd.databricks.v1+notebook": { | |
"dashboards": [], | |
"environmentMetadata": null, | |
"language": "python", | |
"notebookMetadata": { | |
"mostRecentlyExecutedCommandWithImplicitDF": { | |
"commandId": 3715992145330473, | |
"dataframes": [ | |
"_sqldf" | |
] | |
}, | |
"pythonIndentUnit": 4 | |
}, | |
"notebookName": "initialize_snowpark_python", | |
"widgets": {} | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment