This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from concurrent.futures import ProcessPoolExecutor | |
| from functools import partial | |
| import numpy as np | |
| import pandas as pd | |
| import torch | |
| import torch.multiprocessing as mp | |
| import torch.nn.functional as F | |
| from datasets import load_dataset |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from contextlib import contextmanager | |
| from functools import partial | |
| from typing import Any | |
| import torch.nn.functional as F | |
| from torch import Tensor, nn | |
| from transformers import PreTrainedModel | |
| from sparsify import SparseCoder |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import partial | |
| import random | |
| from argparse import ArgumentParser | |
| from typing import Literal | |
| import os | |
| from glob import glob | |
| import gzip | |
| from datasets import ( | |
| load_dataset, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from datasets import load_dataset, concatenate_datasets | |
| from functools import partial | |
| def process_map_fn(example, key): | |
| return {"text": example[key]} | |
| def flatten_and_format_conversations(example): | |
| """Flattens the conversations and formats them into a single text field.""" | |
| user_message = example["conversations"][0]["value"] |