Skip to content

Instantly share code, notes, and snippets.

2023-09-12 22:33:49 INFO [jondurbin_spicyboros-70b-2.2.GGUF] Making unquantised GGUF at /workspace/process/jondurbin_spicyboros-70b-2.2/gguf/spicyboros-70b-2.2.fp16.gguf
Loading model file /workspace/process/jondurbin_spicyboros-70b-2.2/source/pytorch_model-00001-of-00015.bin
Loading model file /workspace/process/jondurbin_spicyboros-70b-2.2/source/pytorch_model-00001-of-00015.bin
Loading model file /workspace/process/jondurbin_spicyboros-70b-2.2/source/pytorch_model-00002-of-00015.bin
Loading model file /workspace/process/jondurbin_spicyboros-70b-2.2/source/pytorch_model-00003-of-00015.bin
Loading model file /workspace/process/jondurbin_spicyboros-70b-2.2/source/pytorch_model-00004-of-00015.bin
Loading model file /workspace/process/jondurbin_spicyboros-70b-2.2/source/pytorch_model-00005-of-00015.bin
Loading model file /workspace/process/jondurbin_spicyboros-70b-2.2/source/pytorch_model-00006-of-00015.bin
Loading model file /workspace/process/jondurbin_spicyboros-70b-2.2/source/pytorch_model-00007-of-00015.bi
(pytorch2) ubuntu@a10:/workspace/git/gguf-llama (master ✔) ᐅ ./main -m /workspace/spicyboros-70B-2.2.Q4_0.gguf -c 4096 -p "A chat.\nUSER: Write a story about llamas\nASSISTANT:" -n 128
Log start
main: build = 1215 (89e8959)
main: seed = 1694547445
ggml_init_cublas: found 1 CUDA devices:
Device 0: NVIDIA A10, compute capability 8.6
llama_model_loader: loaded meta data with 20 key-value pairs and 723 tensors from /workspace/spicyboros-70B-2.2.Q4_0.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor 0: token_embd.weight q4_0 [ 8192, 32000, 1, 1 ]
llama_model_loader: - tensor 1: blk.0.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ]
llama_model_loader: - tensor 2: blk.0.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ]
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import argparse
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--base_model_name_or_path", type=str)
parser.add_argument("--output_dir", type=str)
parser.add_argument("--dtype", type=str, default="float16")
parser.add_argument("--max_shard_size", type=str, default="9GiB")
import logging
import time
import argparse
import os
import sys
from multiprocessing import Process, Queue
import threading
logger = logging.getLogger(__name__)
[
{
"id": "0",
"conversations": [
{
"from": "human",
"value": "Hey Samantha, I've run into a bit of a tricky situation at work, and I'm not sure how to handle it. Do you have any advice?"
},
{
"from": "gpt",
#!/usr/bin/env python3
import argparse
import os
import subprocess
def main(model, outbase, outdir):
llamabase = "/workspace/venv/git/llama.cpp"
ggml_version = "v3"
if not os.path.isdir(model):
import time
import os
import logging
import random
from datasets import load_dataset
class QuantAutoGPTQ:
def __init__(self, model_name_or_path, output_dir, dataset,
num_samples=128, trust_remote_code=False, cache_examples=True,
use_fast=True, use_triton=False, bits=[4], group_size=[128], damp=[0.01],
#!/usr/bin/env python3
import argparse
import os
import subprocess
def main(model, outbase, outdir):
llamabase = "/workspace/venv/git/llama.cpp"
ggml_version = "v3"
if not os.path.isdir(model):
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
import os
import argparse
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--base_model_name_or_path", type=str)
cmake_minimum_required(VERSION 3.16)
if(APPLE)
# Build a Universal binary on macOS
# This requires that the found Qt library is compiled as Universal binaries.
set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
endif()
project(gpt4all VERSION 0.1.0 LANGUAGES CXX)