Last active
November 7, 2023 23:47
-
-
Save MrBago/afb28bb2a05c3f6023ce1b8e15dd5d21 to your computer and use it in GitHub Desktop.
Notebook with timings for alias table db operations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "fb66ae62-362b-423c-9075-9ce5c0bbf92c", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import sys\n", | |
"sys.path = [path for path in sys.path if \"userFiles-\" not in path]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "541d2e22-dfb0-487f-b7c7-4f28e830b480", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"from rope.base.project import Project\n", | |
"from rope.contrib.autoimport.defs import SearchResult\n", | |
"from rope.contrib.autoimport.sqlite import AutoImport\n", | |
"\n", | |
"import os; os.makedirs('/tmp/bagoD/rope', exist_ok=True)\n", | |
"project = Project('/tmp/bagoD/rope')\n", | |
"autoimport = AutoImport(project, memory=False)\n", | |
"\n", | |
"autoimport.generate_cache() # Generates a cache of the local modules, from the project you're working on\n", | |
"autoimport.generate_modules_cache() # Generates a cache of external modules" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "3a9cf313-a461-4473-b4b3-beca277fdebf", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import rope.contrib.autoimport.models as m\n", | |
"\n", | |
"class Alias(m.Model):\n", | |
" table_name = \"aliases\"\n", | |
" schema = {\n", | |
" \"alias\": \"TEXT\",\n", | |
" \"module\": \"TEXT\",\n", | |
" }\n", | |
" columns = list(schema.keys())\n", | |
" objects = m.Query(table_name, columns)\n", | |
"\n", | |
" @classmethod\n", | |
" def create_table(cls, connection):\n", | |
" super().create_table(connection)\n", | |
" connection.execute(\"CREATE INDEX IF NOT EXISTS alias ON aliases(alias)\")\n", | |
"\n", | |
" modules = m.Query(\n", | |
" \"(SELECT aliases.*, package, source, type FROM aliases INNER JOIN names on aliases.module = names.module)\",\n", | |
" columns + [\"package\", \"source\", \"type\"],\n", | |
" )\n", | |
" search_modules_with_alias = modules.where(\"alias LIKE (?)\")\n", | |
"\n", | |
"# autoimport._execute(Alias.objects.drop_table())\n", | |
"Alias.create_table(autoimport.connection)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "27bf83f7-0bfe-4760-856a-dc5ecf0c96a9", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 33.1 ms, sys: 0 ns, total: 33.1 ms\nWall time: 32.8 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%time aa = list(autoimport._execute(m.FinalQuery(\"SELECT * FROM names\"), ()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "00838f11-4ed1-4c8b-9262-ba914bb3d33e", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"<sqlite3.Cursor at 0x7f01c4c3c340>" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"autoimport._executemany(m.Name.objects.insert_into(), aa * 100)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "eefa0aa5-3577-442a-bbf6-5a27ec7477e1", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 3.11 s, sys: 316 ms, total: 3.42 s\nWall time: 3.41 s\n" | |
] | |
} | |
], | |
"source": [ | |
"# Check how long it takes to dump the whole Names table\n", | |
"%time aa = list(autoimport._execute(m.FinalQuery(\"SELECT * FROM names\"), ()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "5dd9a43b-d3f6-4d7c-a516-c423787ad644", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 363 µs, sys: 114 µs, total: 477 µs\nWall time: 305 µs\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# join should be very fast when the Alias table is empty\n", | |
"%time list(autoimport._execute(Alias.modules.select_star()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "758bd913-e115-4439-bab0-8f544aa48de9", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import random\n", | |
"\n", | |
"autoimport._execute(Alias.objects.insert_into(), (\"test\", \"numpy\"))\n", | |
"N = len(aa)\n", | |
"for i in range(100):\n", | |
" autoimport._execute(Alias.objects.insert_into(), (f\"test{i}\", aa[random.randint(0, N)][1]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "3c755f4f-686a-407b-a12d-260f6944cb62", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 824 ms, sys: 59.8 ms, total: 884 ms\nWall time: 883 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"# This is the time for listing the entire Alias table, < 1s compared to > 3 sec for the full Names table.\n", | |
"%time jull_join_table = set(autoimport._execute(Alias.modules.select_star()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "9a42911a-22f4-4990-a10f-71dbbf004f9d", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 55.3 ms, sys: 3.81 ms, total: 59.1 ms\nWall time: 58.9 ms\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{('test3', 'pyspark.install', 5),\n", | |
" ('test30', 'tenacity.retry', 4),\n", | |
" ('test31', 'pathspec.util', 4),\n", | |
" ('test32', 'seaborn.rcmod', 4),\n", | |
" ('test33', 'asyncio.coroutines', 3),\n", | |
" ('test34', 'pyflakes.messages', 4),\n", | |
" ('test35', 'typing', 3),\n", | |
" ('test36', 'soupsieve.util', 4),\n", | |
" ('test37', 'pickletools', 3),\n", | |
" ('test38', 'email.charset', 3),\n", | |
" ('test39', 'pygments.unistring', 4)}" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Search for aliases in the joined table that match some pattern\n", | |
"%time set(autoimport._execute(Alias.modules.where(\"alias LIKE 'test3%'\").select(\"alias\", \"module\", \"source\"), ()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "0f1a7b47-63ca-492f-a79a-393dddb5834a", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 185 ms, sys: 31.4 ms, total: 216 ms\nWall time: 215 ms\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"{('ABC', 'abc', 'abc', 3, 7),\n", | |
" ('abstractclassmethod', 'abc', 'abc', 3, 7),\n", | |
" ('abstractmethod', 'abc', 'abc', 3, 3),\n", | |
" ('abstractproperty', 'abc', 'abc', 3, 7),\n", | |
" ('abstractstaticmethod', 'abc', 'abc', 3, 7),\n", | |
" ('update_abstractmethods', 'abc', 'abc', 3, 3)}" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# For comparison, search for moduels in Names that match a pattern (done for every autoimport)\n", | |
"%time set(autoimport._execute(m.Name.objects.where(\"module like 'abc%'\").select_star()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 0, | |
"metadata": { | |
"application/vnd.databricks.v1+cell": { | |
"cellMetadata": { | |
"byteLimit": 2048000, | |
"rowLimit": 10000 | |
}, | |
"inputWidgets": {}, | |
"nuid": "6469054a-aeb6-48a2-8c41-8e47fcf36a87", | |
"showTitle": false, | |
"title": "" | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 272 µs, sys: 83 µs, total: 355 µs\nWall time: 268 µs\n" | |
] | |
}, | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"[]" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# No alias matches should be very fast\n", | |
"%time list(autoimport._execute(Alias.modules.where(\"alias LIKE 'no_matching_alias'\").select_star(), ()))" | |
] | |
} | |
], | |
"metadata": { | |
"application/vnd.databricks.v1+notebook": { | |
"dashboards": [], | |
"language": "python", | |
"notebookMetadata": { | |
"mostRecentlyExecutedCommandWithImplicitDF": { | |
"commandId": 4058590365490159, | |
"dataframes": [ | |
"_sqldf" | |
] | |
}, | |
"pythonIndentUnit": 2 | |
}, | |
"notebookName": "timeing for aliases table join (1)", | |
"widgets": {} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment