Skip to content

Instantly share code, notes, and snippets.

@Narsil
Narsil / create_dummy_models.py
Last active July 27, 2021 17:11
Creating all dummy models with weights
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import copy
import re
import importlib
import os
import tempfile
from collections import OrderedDict
from datasets import load_dataset, Dataset as hf_Dataset
from transformers import pipeline
from transformers.pipelines.zero_shot_classification import Scoring
from tqdm import tqdm
from random import shuffle
from typing import List
from pprint import pprint
from collections import defaultdict
import json
from transformers import pipeline
from torch.utils.data import Dataset
import tqdm
pipe = pipeline("text-classification", device=0)
class MyDataset(Dataset):
def __len__(self):
from transformers import pipeline
import random
from torch.utils.data import Dataset
import tqdm
pipe = pipeline("text-classification", device=0)
class MyDataset(Dataset):
from transformers import pipeline
from torch.utils.data import Dataset
import tqdm
pipe = pipeline("text-classification", device=0)
class MyDataset(Dataset):
def __len__(self):
import json
import time
from collections import defaultdict
from typing import List
with open("vocab.json", "r") as f:
vocab = json.load(f)
def normal(vocab: dict) -> List[str]:
@Narsil
Narsil / test.py
Created November 2, 2022 21:09
Dummy script to try out safetensors vs pytorch loading.
import datetime
from transformers import pipeline
import torch
print("============== DUMMY ====================")
start = datetime.datetime.now()
device = "cpu"
generator = pipeline("text-generation", model="gpt2", max_new_tokens=20, device=device, do_sample=False)
@Narsil
Narsil / load.py
Created November 10, 2022 09:44
Compare Pytorch speed vs Safetensors
import datetime
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
import torch
device = "cpu"
sf_filename = hf_hub_download("gpt2", filename="model.safetensors")
pt_filename = hf_hub_download("gpt2", filename="pytorch_model.bin")
@Narsil
Narsil / pure_torch.py
Created November 10, 2022 15:06
Loading a safetensors file with pure torch only
import mmap
import torch
import json
import os
from huggingface_hub import hf_hub_download
def load_file(filename, device):
with open(filename, mode="r", encoding="utf8") as file_obj:
with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as m:
@Narsil
Narsil / datasets_asr_pipeline.py
Created November 23, 2022 15:41
Few methods on using datasets + pipelines.
from transformers import pipeline
from datasets import load_dataset
import datetime
import torch
pipe = pipeline("automatic-speech-recognition", model="hf-internal-testing/tiny-random-wav2vec2", device=0)
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]")
filenames = [item["audio"]["path"] for item in dataset]