Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save healiseu/c1c51fc823f4a2cd6cb765b8a1157def to your computer and use it in GitHub Desktop.
Save healiseu/c1c51fc823f4a2cd6cb765b8a1157def to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Demo of Infinidat ORM Python Client for Clickhouse\n",
"### Original source at https://github.com/Infinidat/infi.clickhouse_orm\n",
"#### Modified and extended by Athanassios I. Hatzis \n",
"#### (c) 28th of September 2018"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import psutil, time, datetime\n",
"\n",
"from infi.clickhouse_orm.database import Database\n",
"from infi.clickhouse_orm.models import Model\n",
"from infi.clickhouse_orm.fields import *\n",
"from infi.clickhouse_orm.engines import Memory"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Data Structures"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Create Model\n",
"class CPUStats(Model):\n",
"\n",
" timestamp = DateTimeField()\n",
" cpu_id = UInt16Field()\n",
" cpu_percent = Float32Field()\n",
"\n",
" engine = Memory()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"OrderedDict([('timestamp',\n",
" <infi.clickhouse_orm.fields.DateTimeField at 0x7f1cede05b38>),\n",
" ('cpu_id',\n",
" <infi.clickhouse_orm.fields.UInt16Field at 0x7f1cede05be0>),\n",
" ('cpu_percent',\n",
" <infi.clickhouse_orm.fields.Float32Field at 0x7f1cede05c18>)])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"CPUStats.fields()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('cpustats', <infi.clickhouse_orm.engines.Memory at 0x7f1cede05c50>)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(CPUStats.table_name(), CPUStats.engine)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Create Database or open an existing one\n",
"db = Database('DemoDB')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Create table in database\n",
"db.create_table(CPUStats)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Single Measurement Test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get the first measurement\n",
"psutil.cpu_percent(percpu=True) # first sample should be discarded"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# CPU Utilization for each CPU core\n",
"ts = datetime.datetime.now()\n",
"cpu_sample = psutil.cpu_percent(percpu=True)\n",
"\n",
"# There are 4 cpu core measurements in each cpu_sample taken from psutil.cpu_percent\n",
"# Each core measurement has three values timestamp, core id and cpu utilization\n",
"measurements = [(ts, coreid, util) for coreid, util in enumerate(cpu_sample)]\n",
"\n",
"# For each core in CPU cores\n",
"for core in measurements:\n",
" # Print timestamp, core id and cpu utilization\n",
" (ts, coreid, util) = (core[0], core[1], core[2])\n",
" print(f\"Time:{ts.strftime('%Y-%m-%d %H:%M:%S')}, Core:{coreid}, Utilization: {util}%\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Read/Write Measurements"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Take 10 cpu measurements\n",
"samples = 10\n",
"\n",
"# Take a cpu sample from psutil.cpu_percent every 500msec\n",
"for n in range(samples): \n",
" ts = datetime.datetime.now()\n",
" # read the CPU sample\n",
" cpu_sample = psutil.cpu_percent(percpu=True) \n",
" # read the measurements for each core\n",
" measurements = [CPUStats(timestamp=ts, cpu_id=coreid, cpu_percent=util) for coreid, util in enumerate(cpu_sample)]\n",
" # print measurements for each core\n",
" [print(core.timestamp.strftime('%Y-%m-%d %H:%M:%S'), core.cpu_id, core.cpu_percent) for core in measurements]\n",
" # write measurements to ClickHouse Database\n",
" db.insert(measurements) \n",
" # Pause for 500msec and repeat\n",
" time.sleep(0.500)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Queries"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"40"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We wrote 10 samples in the database and each sample has 4 rows of data, one for each CPU core\n",
"# There are 4x10 = 40 rows in our dataset, each row is an instance of CPUStats\n",
"CPUStats.objects_in(db).count()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Each CPU core has 10 measurements, i.e. CPUStats objects\n",
"# Count instances of the 1st core (cpu_id=1) \n",
"core1_total_samples = CPUStats.objects_in(db).filter(cpu_id=1).count()\n",
"core1_total_samples"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Filtering Query in ORM form\n",
"# Sort all the measurements of CPU Core 1 in ascending order\n",
"core1samples = CPUStats.objects_in(db).filter(cpu_id=1).order_by('cpu_percent')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SELECT `timestamp`, `cpu_id`, `cpu_percent`\n",
"FROM `cpustats`\n",
"WHERE cpu_id = 1\n",
"ORDER BY cpu_percent\n"
]
}
],
"source": [
"# Same query in SQL form\n",
"print(core1samples.as_sql())"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU Core No1 Utilization: 3.8%\n",
"CPU Core No1 Utilization: 3.9%\n",
"CPU Core No1 Utilization: 3.9%\n",
"CPU Core No1 Utilization: 4.0%\n",
"CPU Core No1 Utilization: 4.0%\n",
"CPU Core No1 Utilization: 4.9%\n",
"CPU Core No1 Utilization: 6.0%\n",
"CPU Core No1 Utilization: 7.8%\n",
"CPU Core No1 Utilization: 25.5%\n",
"CPU Core No1 Utilization: 31.4%\n"
]
}
],
"source": [
"# Print CPU Core 1 Utilization\n",
"for val in core1samples: print(f'CPU Core No1 Utilization: {val.cpu_percent}%')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# Filtering Query with range in ORM form\n",
"# Get all the measurements where CPU Core 1 was over 5% busy in descending order\n",
"core1over20 = CPUStats.objects_in(db).filter(cpu_id=1, cpu_percent__gt=5).order_by('cpu_percent DESC')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SELECT `timestamp`, `cpu_id`, `cpu_percent`\n",
"FROM `cpustats`\n",
"WHERE cpu_id = 1 AND cpu_percent > 5.0\n",
"ORDER BY cpu_percent DESC\n"
]
}
],
"source": [
"# Same query in SQL form\n",
"print(core1over20.as_sql())"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU Core No1 Utilization: 31.4%\n",
"CPU Core No1 Utilization: 25.5%\n",
"CPU Core No1 Utilization: 7.8%\n",
"CPU Core No1 Utilization: 6.0%\n"
]
}
],
"source": [
"# Print core1over20 result set\n",
"for val in core1over20: print(f'CPU Core No1 Utilization: {val.cpu_percent}%')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU Core 1 was busy 40.0% of the time\n"
]
}
],
"source": [
"# Find what percentage of time CPU Core 1 was over 5% busy\n",
"result = core1over20.count() * 100.0 / core1_total_samples\n",
"print(f'CPU Core 1 was busy {result}% of the time')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"# Aggregation Query in ORM form \n",
"# CPU Core 1 Average Utilization\n",
"core1avg = CPUStats.objects_in(db).filter(cpu_id=1).aggregate('cpu_id', average='avg(cpu_percent)')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SELECT cpu_id, avg(cpu_percent) AS average\n",
"FROM `cpustats`\n",
"WHERE cpu_id = 1\n",
"GROUP BY `cpu_id`\n"
]
}
],
"source": [
"# Same query in SQL form\n",
"print(core1avg.as_sql())"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU Core No1 Average Utilization: 9.52%\n"
]
}
],
"source": [
"# Print Average\n",
"for val in core1avg: print(f'CPU Core No1 Average Utilization: {round(val.average,2)}%')"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"# Aggregation Query in ORM form\n",
"# Average CPU utilization per core in descending order\n",
"avgpercore = CPUStats.objects_in(db).aggregate('cpu_id', average='avg(cpu_percent)').order_by('average DESC')"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SELECT cpu_id, avg(cpu_percent) AS average\n",
"FROM `cpustats`\n",
"WHERE 1\n",
"GROUP BY `cpu_id`\n",
"ORDER BY average DESC\n"
]
}
],
"source": [
"# Same query in SQL form \n",
"# 'where 1' indicates the absence of any filtering conditions)\n",
"print(avgpercore.as_sql())"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU Core No0: 10.88%\n",
"CPU Core No2: 10.48%\n",
"CPU Core No1: 9.52%\n",
"CPU Core No3: 7.61%\n"
]
}
],
"source": [
"# Print Average for each CPU core\n",
"for avg in avgpercore:\n",
" print(f'CPU Core No{avg.cpu_id}: {avg.average:.2f}%')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment