This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" | |
import copy | |
import re | |
import importlib | |
import os | |
import tempfile | |
from collections import OrderedDict |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datasets import load_dataset, Dataset as hf_Dataset | |
from transformers import pipeline | |
from transformers.pipelines.zero_shot_classification import Scoring | |
from tqdm import tqdm | |
from random import shuffle | |
from typing import List | |
from pprint import pprint | |
from collections import defaultdict | |
import json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import pipeline | |
from torch.utils.data import Dataset | |
import tqdm | |
pipe = pipeline("text-classification", device=0) | |
class MyDataset(Dataset): | |
def __len__(self): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import pipeline | |
import random | |
from torch.utils.data import Dataset | |
import tqdm | |
pipe = pipeline("text-classification", device=0) | |
class MyDataset(Dataset): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import pipeline | |
from torch.utils.data import Dataset | |
import tqdm | |
pipe = pipeline("text-classification", device=0) | |
class MyDataset(Dataset): | |
def __len__(self): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import time | |
from collections import defaultdict | |
from typing import List | |
with open("vocab.json", "r") as f: | |
vocab = json.load(f) | |
def normal(vocab: dict) -> List[str]: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
from transformers import pipeline | |
import torch | |
print("============== DUMMY ====================") | |
start = datetime.datetime.now() | |
device = "cpu" | |
generator = pipeline("text-generation", model="gpt2", max_new_tokens=20, device=device, do_sample=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
from huggingface_hub import hf_hub_download | |
from safetensors.torch import load_file | |
import torch | |
device = "cpu" | |
sf_filename = hf_hub_download("gpt2", filename="model.safetensors") | |
pt_filename = hf_hub_download("gpt2", filename="pytorch_model.bin") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import mmap | |
import torch | |
import json | |
import os | |
from huggingface_hub import hf_hub_download | |
def load_file(filename, device): | |
with open(filename, mode="r", encoding="utf8") as file_obj: | |
with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as m: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import pipeline | |
from datasets import load_dataset | |
import datetime | |
import torch | |
pipe = pipeline("automatic-speech-recognition", model="hf-internal-testing/tiny-random-wav2vec2", device=0) | |
dataset = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation[:10]") | |
filenames = [item["audio"]["path"] for item in dataset] |