-
Microcode Update: Keeping your CPU microcode updated can help in better performance and security. You can install the AMD microcode package by running:
sudo apt install amd64-microcode
# pip install sentence-transformers | |
from sentence_transformers import SentenceTransformer, util, models | |
model_name = "nomic-ai/nomic-embed-text-v1" | |
pooling_mode = "mean" | |
word_embedding_model = models.Transformer( | |
model_name, | |
max_seq_length=8192, | |
model_args={"trust_remote_code": True, "rotary_scaling_factor": 2}, | |
tokenizer_args={"trust_remote_code": True}, |
# pip install sentence-transformers -q | |
# source: https://www.sbert.net/docs/usage/semantic_textual_similarity.html | |
from sentence_transformers import SentenceTransformer, util | |
model = SentenceTransformer("all-MiniLM-L6-v2") | |
# Two lists of sentences | |
sentences1 = [ | |
"The cat sits outside", |
from transformers import AutoTokenizer | |
def load_and_ensure_tokens(model_name): | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Essential special tokens with their default values | |
essential_tokens = { | |
"pad_token": "<pad>", |
import argparse | |
import logging | |
import time | |
from datetime import datetime | |
from pathlib import Path | |
from typing import Optional | |
from huggingface_hub import upload_folder | |
from watchdog.events import PatternMatchingEventHandler | |
from watchdog.observers import Observer |
import logging | |
import random | |
import time | |
from pathlib import Path | |
import fire | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
logging.basicConfig(format="%(levelname)s - %(message)s", level=logging.INFO) |
""" | |
The script is designed to monitor a specified directory for any file system changes (like additions, deletions, or modifications of files and subdirectories) and automatically upload the changes to a specified repository on the Hugging Face Hub. | |
pip install huggingface-hub watchdog | |
""" | |
import argparse | |
import logging | |
import time | |
from pathlib import Path |
import os | |
import random | |
from datasets import load_dataset | |
def format_dataset(example): | |
"""Formats the dataset example into a single 'text' field.""" | |
# Add input only if it is longer than 2 characters |
import logging | |
import subprocess | |
import torch | |
def check_ampere_gpu(): | |
"""Check if the GPU supports NVIDIA Ampere or later and enable FP32 in PyTorch if it does.""" | |
# Check if CUDA is available |
import os | |
import time | |
import requests | |
class Timer: | |
"""Basic timer utility.""" | |
def __enter__(self): |