Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save hvaara/9a7c361478ad8aa429d8172b3119a657 to your computer and use it in GitHub Desktop.
Save hvaara/9a7c361478ad8aa429d8172b3119a657 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import torch\n",
"\n",
"from PIL import Image\n",
"from threading import Thread\n",
"from tqdm import tqdm\n",
"from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration, TextIteratorStreamer"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "284c183f08844ad89049c6d4365787c4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/15 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model_id = \"llava-hf/llava-v1.6-34b-hf\"\n",
"# model_id = \"llava-hf/llava-v1.6-mistral-7b-hf\"\n",
"device = \"mps\"\n",
"\n",
"processor = LlavaNextProcessor.from_pretrained(model_id)\n",
"model = LlavaNextForConditionalGeneration.from_pretrained(\n",
" model_id,\n",
" torch_dtype=torch.float16,\n",
" low_cpu_mem_usage=True,\n",
" device_map=device,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true\"\n",
"image = Image.open(requests.get(url, stream=True).raw)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/102 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"<|im_start|> system\n",
"Answer the questions.<|im_end|><|im_start|> user\n",
"<image> \n",
"What is shown in this image?<|im_end|><|im_start|> assistant\n",
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 68.85853576660156\n",
"torch.mps.current_allocated_memory()/1024**3 = 64.85431742668152\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 2%|▏ | 2/102 [00:25<21:30, 12.90s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74134826660156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59125232696533\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 3%|▎ | 3/102 [00:49<28:48, 17.46s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"The \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74134826660156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59180760383606\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 4%|▍ | 4/102 [01:12<32:01, 19.61s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"image \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74134826660156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59236216545105\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 5%|▍ | 5/102 [01:36<33:42, 20.85s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"displays \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74330139160156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.5929172039032\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 6%|▌ | 6/102 [01:59<34:36, 21.63s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"a \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74330139160156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59347248077393\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 7%|▋ | 7/102 [02:21<34:40, 21.90s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"radar \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74330139160156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59402799606323\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 8%|▊ | 8/102 [02:44<34:37, 22.10s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74525451660156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59458351135254\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 9%|▉ | 9/102 [03:07<35:02, 22.61s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"chart, \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74525451660156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59515404701233\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 10%|▉ | 10/102 [03:30<34:50, 22.72s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"also \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74525451660156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59571552276611\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 11%|█ | 11/102 [03:54<34:50, 22.97s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"known \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74525451660156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59625792503357\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 12%|█▏ | 12/102 [04:17<34:41, 23.12s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"as \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74734497070312\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59680914878845\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 13%|█▎ | 13/102 [04:41<34:35, 23.32s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"a \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74748229980469\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59736609458923\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 14%|█▎ | 14/102 [05:04<34:01, 23.20s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"spider \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74772644042969\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59792304039001\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 15%|█▍ | 15/102 [05:27<33:26, 23.06s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74981689453125\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59848022460938\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 16%|█▌ | 16/102 [05:50<33:04, 23.07s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"chart, \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.74993896484375\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59903740882874\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 17%|█▋ | 17/102 [06:13<32:36, 23.02s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"which \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75006103515625\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.59959506988525\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 18%|█▊ | 18/102 [06:36<32:09, 22.97s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"is \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75019836425781\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60015273094177\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 19%|█▊ | 19/102 [07:00<32:06, 23.21s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"a \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75228881835938\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60071063041687\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 20%|█▉ | 20/102 [07:22<31:34, 23.11s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"graphical \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75242614746094\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60126876831055\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 21%|██ | 21/102 [07:45<31:04, 23.02s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"method \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.7525634765625\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6018271446228\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 22%|██▏ | 22/102 [08:08<30:34, 22.93s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"of \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75465393066406\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60238575935364\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 23%|██▎ | 23/102 [08:31<30:07, 22.89s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"displaying \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75479125976562\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60294461250305\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 24%|██▎ | 24/102 [08:54<29:54, 23.00s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"multivariate \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75491333007812\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60350346565247\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 25%|██▍ | 25/102 [09:17<29:42, 23.14s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"data \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75505065917969\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60406279563904\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 25%|██▌ | 26/102 [09:40<29:14, 23.08s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"in \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75714111328125\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60462212562561\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 26%|██▋ | 27/102 [10:03<28:43, 22.98s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"the \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75727844238281\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60518169403076\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 27%|██▋ | 28/102 [10:26<28:20, 22.97s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"form \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75740051269531\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60574150085449\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 28%|██▊ | 29/102 [10:50<28:14, 23.22s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"of \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75949096679688\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6063015460968\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 29%|██▉ | 30/102 [11:13<27:40, 23.07s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"a \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.75975036621094\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60686182975769\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 30%|███ | 31/102 [11:35<27:09, 22.95s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.7598876953125\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60742235183716\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 31%|███▏ | 32/102 [11:58<26:37, 22.82s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76002502441406\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60798287391663\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 32%|███▏ | 33/102 [12:21<26:28, 23.02s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"two-dimensional \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76405334472656\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60854387283325\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 33%|███▎ | 34/102 [12:45<26:25, 23.32s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"chart \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76419067382812\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60910487174988\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 34%|███▍ | 35/102 [13:09<26:17, 23.54s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"of \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76431274414062\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.60966610908508\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 35%|███▌ | 36/102 [13:33<26:00, 23.64s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"three \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76640319824219\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61022758483887\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 36%|███▋ | 37/102 [13:57<25:47, 23.80s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"or \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76654052734375\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61078929901123\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 37%|███▋ | 38/102 [14:21<25:16, 23.70s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"more \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76666259765625\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61135125160217\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 38%|███▊ | 39/102 [14:44<24:45, 23.58s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"quantitative \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76678466796875\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6119134426117\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 39%|███▉ | 40/102 [15:07<24:14, 23.45s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"variables \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76887512207031\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61247539520264\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 40%|████ | 41/102 [15:31<23:55, 23.53s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"represented \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76901245117188\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61305236816406\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 41%|████ | 42/102 [15:54<23:30, 23.51s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"on \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.76914978027344\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61362028121948\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 42%|████▏ | 43/102 [16:18<23:07, 23.52s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"axes \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.771240234375\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61416983604431\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 43%|████▎ | 44/102 [16:42<22:54, 23.70s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"starting \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77137756347656\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61472821235657\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 44%|████▍ | 45/102 [17:06<22:34, 23.76s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"from \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77149963378906\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61529159545898\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 45%|████▌ | 46/102 [17:30<22:05, 23.67s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"the \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77163696289062\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61585521697998\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 46%|████▌ | 47/102 [17:53<21:39, 23.63s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"same \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77384948730469\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61641907691956\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 47%|████▋ | 48/102 [18:17<21:16, 23.64s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77398681640625\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61698293685913\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 48%|████▊ | 49/102 [18:41<20:59, 23.76s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"point.\n",
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77412414550781\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61754727363586\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 49%|████▉ | 50/102 [19:04<20:32, 23.71s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77621459960938\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6181116104126\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 50%|█████ | 51/102 [19:28<20:05, 23.63s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77633666992188\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61867618560791\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 51%|█████ | 52/102 [19:51<19:36, 23.54s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"In \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77645874023438\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6192409992218\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 52%|█████▏ | 53/102 [20:14<19:09, 23.46s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"this \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77659606933594\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.61980605125427\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 53%|█████▎ | 54/102 [20:38<18:44, 23.43s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"particular \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.7786865234375\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62037134170532\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 54%|█████▍ | 55/102 [21:01<18:20, 23.42s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77880859375\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62093687057495\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 55%|█████▍ | 56/102 [21:24<17:54, 23.36s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"chart, \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.77894592285156\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62150168418884\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 56%|█████▌ | 57/102 [21:48<17:31, 23.36s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"there \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78103637695312\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62206768989563\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 57%|█████▋ | 58/102 [22:11<17:07, 23.35s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"are \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78117370605469\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62263441085815\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 58%|█████▊ | 59/102 [22:34<16:40, 23.26s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"several \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78131103515625\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62320065498352\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 59%|█████▉ | 60/102 [22:58<16:21, 23.36s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"datasets \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78144836425781\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62376713752747\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 60%|█████▉ | 61/102 [23:21<16:01, 23.45s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"represented \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78353881835938\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62433385848999\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 61%|██████ | 62/102 [23:45<15:37, 23.43s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"by \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78367614746094\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6249008178711\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 62%|██████▏ | 63/102 [24:08<15:14, 23.45s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"different \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78392028808594\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62546801567078\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 63%|██████▎ | 64/102 [24:32<14:52, 23.48s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"colors \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.7860107421875\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62603521347046\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 64%|██████▎ | 65/102 [24:57<14:43, 23.87s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"and \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78614807128906\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6266028881073\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 65%|██████▍ | 66/102 [25:21<14:28, 24.12s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"labeled \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78628540039062\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62717056274414\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 66%|██████▌ | 67/102 [25:45<14:02, 24.07s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"with \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78640747070312\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62773847579956\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 67%|██████▋ | 68/102 [26:09<13:31, 23.86s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"various \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78849792480469\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62830662727356\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 68%|██████▊ | 69/102 [26:32<13:04, 23.76s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78863525390625\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.62887501716614\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 69%|██████▊ | 70/102 [26:56<12:37, 23.67s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.78877258300781\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6294436454773\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 70%|██████▉ | 71/102 [27:19<12:09, 23.53s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"acronyms \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79086303710938\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63001251220703\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 71%|███████ | 72/102 [27:42<11:42, 23.42s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"such \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79100036621094\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63058137893677\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 72%|███████▏ | 73/102 [28:06<11:28, 23.76s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"as \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79112243652344\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63116431236267\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 73%|███████▎ | 74/102 [28:31<11:10, 23.96s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.793212890625\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6317389011383\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 74%|███████▎ | 75/102 [28:54<10:42, 23.81s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79335021972656\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63229537010193\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 75%|███████▍ | 76/102 [29:18<10:18, 23.79s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79348754882812\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63286066055298\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 75%|███████▌ | 77/102 [29:42<09:54, 23.77s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79362487792969\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63343071937561\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 76%|███████▋ | 78/102 [30:06<09:29, 23.75s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"MM-Vet, \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79571533203125\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63400101661682\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 77%|███████▋ | 79/102 [30:29<09:06, 23.76s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79585266113281\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63457155227661\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 78%|███████▊ | 80/102 [30:53<08:41, 23.69s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79609680175781\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6351420879364\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 79%|███████▉ | 81/102 [31:17<08:19, 23.78s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79818725585938\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63571310043335\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 80%|████████ | 82/102 [31:40<07:52, 23.60s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79830932617188\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.6362841129303\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 81%|████████▏ | 83/102 [32:04<07:31, 23.76s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.79844665527344\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63685536384583\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 82%|████████▏ | 84/102 [32:28<07:10, 23.92s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.798583984375\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63742685317993\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 83%|████████▎ | 85/102 [32:53<06:49, 24.07s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"LLaVA-Bench, \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80067443847656\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63799858093262\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 84%|████████▍ | 86/102 [33:16<06:22, 23.92s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80081176757812\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63857054710388\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 85%|████████▌ | 87/102 [33:40<05:58, 23.88s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80094909667969\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63914275169373\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 86%|████████▋ | 88/102 [34:04<05:34, 23.88s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80303955078125\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.63971495628357\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 87%|████████▋ | 89/102 [34:28<05:10, 23.89s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80317687988281\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64028763771057\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 88%|████████▊ | 90/102 [34:52<04:47, 23.99s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80331420898438\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.640860080719\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 89%|████████▉ | 91/102 [35:17<04:26, 24.21s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"SQA-IMG, \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80540466308594\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64143323898315\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 90%|█████████ | 92/102 [35:41<04:00, 24.04s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"and \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80552673339844\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64200639724731\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 91%|█████████ | 93/102 [36:04<03:34, 23.84s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80564880371094\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64257979393005\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 92%|█████████▏| 94/102 [36:28<03:10, 23.79s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"others. \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.8057861328125\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64315342903137\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 93%|█████████▎| 95/102 [36:51<02:46, 23.80s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Each \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.807861328125\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64372730255127\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 94%|█████████▍| 96/102 [37:15<02:22, 23.67s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"dataset \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80812072753906\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64430117607117\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 95%|█████████▌| 97/102 [37:40<01:59, 23.97s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"is \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.80825805664062\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64487552642822\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 96%|█████████▌| 98/102 [38:04<01:36, 24.16s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"plotted \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.81033325195312\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64544987678528\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 97%|█████████▋| 99/102 [38:29<01:13, 24.45s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"along \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.81045532226562\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64602446556091\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 98%|█████████▊| 100/102 [38:55<00:49, 24.82s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"multiple \n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.81059265136719\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64659929275513\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 102/102 [39:20<00:00, 23.14s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.81072998046875\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64717435836792\n",
"axes,\n",
"torch.mps.driver_allocated_memory()/1024**3 = 69.81072998046875\n",
"torch.mps.current_allocated_memory()/1024**3 = 65.64717316627502\n",
"102\n",
"['<|im_start|> system\\nAnswer the questions.<|im_end|><|im_start|> user\\n<image> \\nWhat is shown in this image?<|im_end|><|im_start|> assistant\\n', '', 'The ', 'image ', 'displays ', 'a ', 'radar ', '', 'chart, ', 'also ', 'known ', 'as ', 'a ', 'spider ', '', 'chart, ', 'which ', 'is ', 'a ', 'graphical ', 'method ', 'of ', 'displaying ', 'multivariate ', 'data ', 'in ', 'the ', 'form ', 'of ', 'a ', '', '', 'two-dimensional ', 'chart ', 'of ', 'three ', 'or ', 'more ', 'quantitative ', 'variables ', 'represented ', 'on ', 'axes ', 'starting ', 'from ', 'the ', 'same ', '', 'point.\\n', '\\n', '', 'In ', 'this ', 'particular ', '', 'chart, ', 'there ', 'are ', 'several ', 'datasets ', 'represented ', 'by ', 'different ', 'colors ', 'and ', 'labeled ', 'with ', 'various ', '', '', 'acronyms ', 'such ', 'as ', '', '', '', '', 'MM-Vet, ', '', '', '', '', '', '', 'LLaVA-Bench, ', '', '', '', '', '', 'SQA-IMG, ', 'and ', '', 'others. ', 'Each ', 'dataset ', 'is ', 'plotted ', 'along ', 'multiple ', '', 'axes,']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"prompt = \"<|im_start|>system\\nAnswer the questions.<|im_end|><|im_start|>user\\n<image>\\nWhat is shown in this image?<|im_end|><|im_start|>assistant\\n\"\n",
"\n",
"inputs = processor(prompt, image, return_tensors=\"pt\").to(device)\n",
"\n",
"max_new_tokens = 100\n",
"streamer = TextIteratorStreamer(processor)\n",
"generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)\n",
"\n",
"thread = Thread(target=model.generate, kwargs=generation_kwargs)\n",
"thread.start()\n",
"\n",
"text = []\n",
"for chunk in tqdm(streamer, total=max_new_tokens+2):\n",
" text.append(chunk)\n",
" print(chunk)\n",
" torch.mps.empty_cache()\n",
" print(f\"{torch.mps.driver_allocated_memory()/1024**3 = }\")\n",
" print(f\"{torch.mps.current_allocated_memory()/1024**3 = }\")\n",
"print(f\"{len(text)}\")\n",
"print(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "hfdev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment