Skip to content

Instantly share code, notes, and snippets.

@orellabac
Last active June 26, 2024 03:23
Show Gist options
  • Save orellabac/c21308663e754736983602a49986f4c6 to your computer and use it in GitHub Desktop.
Save orellabac/c21308663e754736983602a49986f4c6 to your computer and use it in GitHub Desktop.
Sample Notebook that can be used to use Snowpark In a Databricks Notebook
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "897eb1dd-619b-4948-a277-9440ce33a4e9",
"showTitle": true,
"title": "Register MAGIC cell"
}
},
"outputs": [],
"source": [
"# use this extensions if you want to have %%sql magics that run your code in snowpark\n",
"try:\n",
" from IPython.core.magic import register_cell_magic\n",
" def sql(line, cell):\n",
" import IPython\n",
" import re\n",
" user_ns = IPython.get_ipython().user_ns\n",
" if \"session\" in user_ns:\n",
" session = user_ns['session']\n",
" #if you have installed jinja you can use it\n",
" #from jinja2 import Template\n",
" #t = Template(cell)\n",
" #res = t.render(user_ns)\n",
" res = cell\n",
" name = None\n",
" if line and line.strip():\n",
" name = line.strip().split(\" \")[0]\n",
" # If there are several statements only last will be returned\n",
" # also we will remove all ; at the end to avoid issues with empty statements\n",
" res = re.sub(r';+$', '', res)\n",
" for cursor in session.connection.execute_string(res):\n",
" df = session.sql(f\"SELECT * FROM TABLE(RESULT_SCAN('{cursor.sfqid}'))\")\n",
" # to avoid needed to do a count on display\n",
" setattr(df,\"_cached_rowcount\",cursor.rowcount)\n",
" if name:\n",
" user_ns[name] = df\n",
" else:\n",
" user_ns[\"__df\"] = df\n",
" return df\n",
" else:\n",
" return \"No session was found. You can setup one by running: session = Session.builder.getOrCreate()\"\n",
" register_cell_magic(sql)\n",
"except:\n",
" print(\"error with magic\")"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "595b9679-ffc6-414f-a278-f14155b12ab1",
"showTitle": true,
"title": "ExtendDataFrame"
}
},
"outputs": [],
"source": [
"# use this extensions for better integration with notebooks\n",
"from snowflake.snowpark import DataFrame\n",
"import pandas as pd\n",
"if not hasattr(DataFrame,\"___extended\"):\n",
" \n",
" DataFrame.___extended = True\n",
"\n",
" def _repr_html_(self):\n",
" import IPython\n",
" rows_limit = getattr(DataFrame,'__rows_limit',50)\n",
" if 'display' in globals():\n",
" display = globals()['display']\n",
" elif 'display' in IPython.get_ipython().user_ns:\n",
" display = IPython.get_ipython().user_ns['display']\n",
" else:\n",
" from IPython.display import display\n",
" try:\n",
" count = self._cached_rowcount if hasattr(self,\"_cached_rowcount\") else self.count()\n",
" self.count()\n",
" if count == 0:\n",
" return \"No rows to display\"\n",
" elif count == 1:\n",
" df = pd.DataFrame.from_records([x.as_dict() for x in self.collect()])\n",
" elif count > rows_limit:\n",
" print(f\"There are {count} rows. Showing only {rows_limit}. Change DataFrame.__rows_limit value to display more rows\")\n",
" df = self.limit(rows_limit).to_pandas()\n",
" else:\n",
" df = self.to_pandas()\n",
" display(df)\n",
" return \"\"\n",
" except Exception as ex:\n",
" return str(ex)\n",
"\n",
" setattr(DataFrame,'_repr_html_',_repr_html_)\n",
" setattr(DataFrame,'__rows_limit',50)\n",
"\n",
" def _ipython_key_completions_(self):\n",
" \"\"\"Returns the names of columns in this :class:`DataFrame`.\n",
" \"\"\"\n",
" return self.columns\n",
" setattr(DataFrame,'_ipython_key_completions_',_ipython_key_completions_)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "2feac9dd-cb67-4f01-a229-a77952467997",
"showTitle": true,
"title": "ExtendSession"
}
},
"outputs": [],
"source": [
"from snowflake.snowpark import Session\n",
"\n",
"if not hasattr(Session,\"___extended\"):\n",
" setattr(Session,\"___extended\",True)\n",
" def __repr__(self):\n",
" from snowflake.snowpark import Session\n",
" from snowflake.snowpark.version import VERSION\n",
" snowflake_environment = self.sql('select current_version()').collect()\n",
" snowpark_version = VERSION\n",
" # Current Environment Details\n",
" print( \n",
" f'User : {self.get_current_user()}\\n' +\\\n",
" f'Role : {self.get_current_role()}\\n' +\\\n",
" f'Warehouse : {self.get_current_warehouse()}\\n' +\\\n",
" f'Database : {self.get_current_database()}\\n' +\\\n",
" f'Schema : {self.get_current_schema()}\\n' +\\\n",
" f'Snowflake version : {snowflake_environment[0][0]}\\n' +\\\n",
" f'Snowpark for Python version : {snowpark_version[0]}.{snowpark_version[1]}.{snowpark_version[2]}\\n')\n",
" \n",
" # this method is added for IPython compatibility\n",
" setattr(Session,\"__repr__\",__repr__)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "e04de2ab-42a6-41be-b00d-8fafcbf42582",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"# as a recommended practice gather this from secrets\n",
"\n",
"import os\n",
"scope = \"sf_secrets_demo\"\n",
"os.environ[\"SNOWFLAKE_CONNECTIONS\"] = f\"\"\"\n",
"\n",
"default_connection_name = \"default\"\n",
"[connections]\n",
"\n",
"[default]\n",
"account = \"{dbutils.secrets.get(scope, \"sf-account\")}\"\n",
"user = \"{dbutils.secrets.get(scope, \"sf-user\")}\"\n",
"password = \"{dbutils.secrets.get(scope, \"sf-password\")}\"\n",
"role = \"{dbutils.secrets.get(scope, \"sf-role\")}\"\n",
"database = \"{dbutils.secrets.get(scope, \"sf-database\")}\"\n",
"schema = \"{dbutils.secrets.get(scope, \"sf-schema\")}\"\n",
"warehouse = \"{dbutils.secrets.get(scope, \"sf-warehouse\")}\"\n",
"\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "bb0a76fe-ac3a-4f38-9c9f-da064ee86b6f",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"from snowflake.snowpark.session import Session\n",
"import snowflake.snowpark.functions as f\n",
"from snowflake.snowpark.functions import lower, col, current_timestamp, regexp_replace, lit, concat, udf\n",
"\n",
"\n",
"# create snowpark session\n",
"notebook_name = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()\n",
"session = Session.builder.app_name(notebook_name).getOrCreate()\n",
"session.sql_simplifier_enabled = True\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "458d052c-8479-4c37-a25b-49b2aa509d20",
"showTitle": false,
"title": ""
}
},
"source": [
"![Snowpark Loaded](https://developers.snowflake.com/wp-content/uploads/2022/06/Snowpark@2x.png)"
]
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"dashboards": [],
"environmentMetadata": null,
"language": "python",
"notebookMetadata": {
"mostRecentlyExecutedCommandWithImplicitDF": {
"commandId": 3715992145330473,
"dataframes": [
"_sqldf"
]
},
"pythonIndentUnit": 4
},
"notebookName": "initialize_snowpark_python",
"widgets": {}
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment