Skip to content

Instantly share code, notes, and snippets.

@huseinzol05
Created May 6, 2024 08:55
Show Gist options
  • Save huseinzol05/ff59996034604d17c1e53074e9adc03f to your computer and use it in GitHub Desktop.
Save huseinzol05/ff59996034604d17c1e53074e9adc03f to your computer and use it in GitHub Desktop.
Simple matmul comparison
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "b8432016",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2.3.0+cu121'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"\n",
"torch.__version__"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "acbdd506",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/husein/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import time\n",
"from hqq.core.quantize import HQQLinear\n",
"\n",
"dtype = torch.float16\n",
"device = 'cuda'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "45874ba9",
"metadata": {},
"outputs": [],
"source": [
"x = torch.randn((1, 32, 768), dtype = dtype, device = device)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ab03d293",
"metadata": {},
"outputs": [],
"source": [
"linear = torch.nn.Linear(768, 32000, dtype = dtype, device = device)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "047fc44a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 5.09 ms, sys: 0 ns, total: 5.09 ms\n",
"Wall time: 3.85 ms\n"
]
},
{
"data": {
"text/plain": [
"0.0038194656372070312"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"\n",
"before = time.time()\n",
"for _ in range(10):\n",
" linear(x)\n",
"time.time() - before"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "b19d626c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"8 0.0006330013275146484\n",
"4 0.0009520053863525391\n",
"2 0.0011856555938720703\n",
"1 0.001649618148803711\n"
]
}
],
"source": [
"from hqq.core.quantize import *\n",
"from hqq.core.quantize import HQQLinear\n",
"\n",
"ints = [8, 4, 2, 1]\n",
"\n",
"for i in ints:\n",
" quant = BaseQuantizeConfig(nbits=i, \n",
" group_size=64,\n",
" quant_zero=False,\n",
" quant_scale=False,\n",
" axis=0,\n",
" offload_meta=False)\n",
" out_module = HQQLinear(\n",
" linear,\n",
" quant,\n",
" compute_dtype=dtype,\n",
" device=device,\n",
" )\n",
" out_module(x)\n",
" before = time.time()\n",
" for _ in range(10):\n",
" out_module(x)\n",
" print(i, time.time() - before)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bd451b00",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "python3.10",
"language": "python",
"name": "python3.10"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment