Skip to content

Instantly share code, notes, and snippets.

View iohub's full-sized avatar
💭
I may be slow to respond.

iohub

💭
I may be slow to respond.
View GitHub Profile
@iohub
iohub / nvfan-control.sh
Last active May 11, 2024 13:24
remote gpu fan control
#!/bin/bash
# http://coldmooon.github.io/2015/08/10/fans_control/
speed=$1
echo $speed
export DISPLAY=:0.0
nvidia-settings -a "[gpu:0]/GPUFanControlState=1" -a "[fan:0]/GPUTargetFanSpeed="$speed
nvidia-settings -a "[gpu:0]/GPUFanControlState=1" -a "[fan:1]/GPUTargetFanSpeed="$speed
@iohub
iohub / tllm-openapi.py
Created April 27, 2024 02:36
tensorrt-llm infer api
from typing import Union
from fastapi import FastAPI
from pydantic import BaseModel
import numpy as np
import torch
from utils import (DEFAULT_HF_MODEL_DIRS, DEFAULT_PROMPT_TEMPLATES,
load_tokenizer, read_model_name, throttle_generator)
@iohub
iohub / process-guwen.py
Created April 11, 2024 13:41
process-guwen.py
import sys
import json
from typing import Union
from pathlib import Path
def _resolve_path(path: Union[str, Path]) -> Path:
return Path(path).expanduser().resolve()
@iohub
iohub / chatchat.py
Created April 9, 2024 13:19
langchain-chatchat-cpp
set OPENAI_API_KEY=xxx
python startup.py -a -i
def get_ChatOpenAI(
model_name: str,
temperature: float,
max_tokens: int = None,
streaming: bool = True,
callbacks: List[Callable] = [],
@iohub
iohub / melo-tts-install.log
Created April 2, 2024 12:26
melo-tts-install
Download UniDic /home/do/ssd/env/tts/lib/python3.10/site-packages/unidic/dicdir
@iohub
iohub / infer-with-lora-model.py
Last active March 31, 2024 13:57
infer-with-lora-model
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import transformers
adapters_name = "/home/do/ssd/proj/finetune-hub/ck/checkpoint-1000"
model_name = "/home/do/ssd/modelhub/Starling-LM-7B-beta"
device = "cuda" # the device to load the model onto
@iohub
iohub / finetune-openchat.py
Last active April 10, 2024 14:53
finetune openchat
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import Dataset
from unsloth import FastMistralModel, FastLanguageModel, FastLlamaModel
import torch
import pandas as pd
'''
{
"instruction": "编辑一句话,改变句子结构,让它更加有趣。",
@iohub
iohub / finetune.py
Last active March 30, 2024 10:56
finetune openchat by unsloth
from unsloth import FastMistralModel, FastLanguageModel, FastLlamaModel
import torch
max_seq_length = 4096 # Can change to whatever number <= 4096
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
hf_token = "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"
model, tokenizer = FastMistralModel.from_pretrained(
docker pull nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3
docker run -dt --name tensorrt-llm --restart=always --gpus all --network=host --shm-size=4g -m 64G -v /home/:/home -w /home/do nvcr.io/nvidia/tritonserver:23.10-trtllm-python-py3 /bin/bash
docker exec -it trtllm /bin/bash
git clone https://github.com/NVIDIA/TensorRT-LLM.git
cd TensorRT-LLM
git submodule update --init --recursive
python3 ./scripts/build_wheel.py --trt_root /usr/local/tensorrt
@iohub
iohub / fastload.py
Created July 6, 2022 04:17
python generator
import time
import os
# delete it
class FakeWB:
def __init__(self, name, age):
self.name = name
self.age = age