Skip to content

Instantly share code, notes, and snippets.

Created November 22, 2023 04:22
Show Gist options
  • Save MarshalW/6856f8ccc56218a053b24527a8acf972 to your computer and use it in GitHub Desktop.
Save MarshalW/6856f8ccc56218a053b24527a8acf972 to your computer and use it in GitHub Desktop.
langchain-chatchat 运行 qwen 量化模型的设置

langchain-chatchat 运行 qwen 量化模型的设置


FROM nvidia/cuda:12.2.0-devel-ubuntu22.04

# 安装python/pip/git
RUN sed -i s@/ /etc/apt/sources.list \
    && apt-get update \
    && apt-get install \
    python3 \
    python3-pip \
    git \
    -y -qq

# 安装 pytorch
RUN pip config --user set global.index-url \
    && pip3 install torch torchvision torchaudio

WORKDIR /Langchain-Chatchat
COPY ./Langchain-Chatchat /Langchain-Chatchat

RUN pip install -r requirements.txt

# 初始化参数
RUN python3

RUN chmod +x ./

RUN apt-get install \
    libgl1-mesa-glx \
    libglib2.0-0 \

RUN pip install zhipuai

# 支持通义千问
RUN pip install \
    transformers==4.32.0 \
    accelerate tiktoken einops scipy \
    transformers_stream_generator==0.0.4 \
    peft deepspeed
RUN git clone \
    && cd flash-attention && pip install .
RUN pip install optimum auto-gptq

CMD ["/Langchain-Chatchat/"]

LLM_MODELS = ["Qwen-14B-Chat-Int4","zhipu-api", "openai-api"]

Qwen-14B-Chat-Int4 模型的 ./config.json:

  "architectures": [
  "auto_map": {
    "AutoConfig": "configuration_qwen.QWenConfig",
    "AutoModelForCausalLM": "modeling_qwen.QWenLMHeadModel"
  "attn_dropout_prob": 0.0,
  "bf16": false,
  "emb_dropout_prob": 0.0,
  "fp16": true,
  "fp32": false,
  "hidden_size": 5120,
  "intermediate_size": 27392,
  "initializer_range": 0.02,
  "kv_channels": 128,
  "layer_norm_epsilon": 1e-06,
  "max_position_embeddings": 8192,
  "model_type": "qwen",
  "no_bias": true,
  "num_attention_heads": 40,
  "num_hidden_layers": 40,
  "onnx_safe": null,
  "quantization_config": {
    "bits": 4,
    "group_size": 128,
    "damp_percent": 0.01,
    "desc_act": false,
    "static_groups": false,
    "sym": true,
    "true_sequential": true,
    "model_name_or_path": null,
    "model_file_base_name": "model",
    "quant_method": "gptq",
    "disable_exllama": true // 加这个
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment