Skip to content

Instantly share code, notes, and snippets.

@MrBago
Last active November 7, 2023 23:47
Show Gist options
  • Save MrBago/afb28bb2a05c3f6023ce1b8e15dd5d21 to your computer and use it in GitHub Desktop.
Save MrBago/afb28bb2a05c3f6023ce1b8e15dd5d21 to your computer and use it in GitHub Desktop.
Notebook with timings for alias table db operations
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "fb66ae62-362b-423c-9075-9ce5c0bbf92c",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"import sys\n",
"sys.path = [path for path in sys.path if \"userFiles-\" not in path]"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "541d2e22-dfb0-487f-b7c7-4f28e830b480",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"from rope.base.project import Project\n",
"from rope.contrib.autoimport.defs import SearchResult\n",
"from rope.contrib.autoimport.sqlite import AutoImport\n",
"\n",
"import os; os.makedirs('/tmp/bagoD/rope', exist_ok=True)\n",
"project = Project('/tmp/bagoD/rope')\n",
"autoimport = AutoImport(project, memory=False)\n",
"\n",
"autoimport.generate_cache() # Generates a cache of the local modules, from the project you're working on\n",
"autoimport.generate_modules_cache() # Generates a cache of external modules"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "3a9cf313-a461-4473-b4b3-beca277fdebf",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"import rope.contrib.autoimport.models as m\n",
"\n",
"class Alias(m.Model):\n",
" table_name = \"aliases\"\n",
" schema = {\n",
" \"alias\": \"TEXT\",\n",
" \"module\": \"TEXT\",\n",
" }\n",
" columns = list(schema.keys())\n",
" objects = m.Query(table_name, columns)\n",
"\n",
" @classmethod\n",
" def create_table(cls, connection):\n",
" super().create_table(connection)\n",
" connection.execute(\"CREATE INDEX IF NOT EXISTS alias ON aliases(alias)\")\n",
"\n",
" modules = m.Query(\n",
" \"(SELECT aliases.*, package, source, type FROM aliases INNER JOIN names on aliases.module = names.module)\",\n",
" columns + [\"package\", \"source\", \"type\"],\n",
" )\n",
" search_modules_with_alias = modules.where(\"alias LIKE (?)\")\n",
"\n",
"# autoimport._execute(Alias.objects.drop_table())\n",
"Alias.create_table(autoimport.connection)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "27bf83f7-0bfe-4760-856a-dc5ecf0c96a9",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 33.1 ms, sys: 0 ns, total: 33.1 ms\nWall time: 32.8 ms\n"
]
}
],
"source": [
"%time aa = list(autoimport._execute(m.FinalQuery(\"SELECT * FROM names\"), ()))"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "00838f11-4ed1-4c8b-9262-ba914bb3d33e",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<sqlite3.Cursor at 0x7f01c4c3c340>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"autoimport._executemany(m.Name.objects.insert_into(), aa * 100)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "eefa0aa5-3577-442a-bbf6-5a27ec7477e1",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.11 s, sys: 316 ms, total: 3.42 s\nWall time: 3.41 s\n"
]
}
],
"source": [
"# Check how long it takes to dump the whole Names table\n",
"%time aa = list(autoimport._execute(m.FinalQuery(\"SELECT * FROM names\"), ()))"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "5dd9a43b-d3f6-4d7c-a516-c423787ad644",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 363 µs, sys: 114 µs, total: 477 µs\nWall time: 305 µs\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# join should be very fast when the Alias table is empty\n",
"%time list(autoimport._execute(Alias.modules.select_star()))"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "758bd913-e115-4439-bab0-8f544aa48de9",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"import random\n",
"\n",
"autoimport._execute(Alias.objects.insert_into(), (\"test\", \"numpy\"))\n",
"N = len(aa)\n",
"for i in range(100):\n",
" autoimport._execute(Alias.objects.insert_into(), (f\"test{i}\", aa[random.randint(0, N)][1]))"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "3c755f4f-686a-407b-a12d-260f6944cb62",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 824 ms, sys: 59.8 ms, total: 884 ms\nWall time: 883 ms\n"
]
}
],
"source": [
"# This is the time for listing the entire Alias table, < 1s compared to > 3 sec for the full Names table.\n",
"%time jull_join_table = set(autoimport._execute(Alias.modules.select_star()))"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "9a42911a-22f4-4990-a10f-71dbbf004f9d",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 55.3 ms, sys: 3.81 ms, total: 59.1 ms\nWall time: 58.9 ms\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{('test3', 'pyspark.install', 5),\n",
" ('test30', 'tenacity.retry', 4),\n",
" ('test31', 'pathspec.util', 4),\n",
" ('test32', 'seaborn.rcmod', 4),\n",
" ('test33', 'asyncio.coroutines', 3),\n",
" ('test34', 'pyflakes.messages', 4),\n",
" ('test35', 'typing', 3),\n",
" ('test36', 'soupsieve.util', 4),\n",
" ('test37', 'pickletools', 3),\n",
" ('test38', 'email.charset', 3),\n",
" ('test39', 'pygments.unistring', 4)}"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Search for aliases in the joined table that match some pattern\n",
"%time set(autoimport._execute(Alias.modules.where(\"alias LIKE 'test3%'\").select(\"alias\", \"module\", \"source\"), ()))"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "0f1a7b47-63ca-492f-a79a-393dddb5834a",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 185 ms, sys: 31.4 ms, total: 216 ms\nWall time: 215 ms\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{('ABC', 'abc', 'abc', 3, 7),\n",
" ('abstractclassmethod', 'abc', 'abc', 3, 7),\n",
" ('abstractmethod', 'abc', 'abc', 3, 3),\n",
" ('abstractproperty', 'abc', 'abc', 3, 7),\n",
" ('abstractstaticmethod', 'abc', 'abc', 3, 7),\n",
" ('update_abstractmethods', 'abc', 'abc', 3, 3)}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# For comparison, search for moduels in Names that match a pattern (done for every autoimport)\n",
"%time set(autoimport._execute(m.Name.objects.where(\"module like 'abc%'\").select_star()))"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "6469054a-aeb6-48a2-8c41-8e47fcf36a87",
"showTitle": false,
"title": ""
}
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 272 µs, sys: 83 µs, total: 355 µs\nWall time: 268 µs\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# No alias matches should be very fast\n",
"%time list(autoimport._execute(Alias.modules.where(\"alias LIKE 'no_matching_alias'\").select_star(), ()))"
]
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"dashboards": [],
"language": "python",
"notebookMetadata": {
"mostRecentlyExecutedCommandWithImplicitDF": {
"commandId": 4058590365490159,
"dataframes": [
"_sqldf"
]
},
"pythonIndentUnit": 2
},
"notebookName": "timeing for aliases table join (1)",
"widgets": {}
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment