This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import numpy as np | |
from hqq.core.quantize import HQQLinear, BaseQuantizeConfig, Quantizer, HQQBackend | |
from hqq.backends.torchao import HQQLinearTorchWeightOnlynt4, patch_hqq_to_aoint4 | |
# from unpack_int4.ops import unpack_int4_packed | |
import torchao | |
import bitblas | |
# unpack_cuda_compiled = torch.compile(torchao.ops.unpack_int4_to_int, mode="default", fullgraph=True) | |
from bitblas.cache import global_operator_cache, get_database_path |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gpt_eval_template_coherence = """ | |
You will be given title: [TITLE] and description: [DESC] written from a set of information of a real estate listing in Turkish. | |
Your task is to rate the title and description on one metric. | |
Please make sure you read and understand these instructions carefully. Please keep this | |
document open while reviewing, and refer to it as needed. | |
Evaluation Criteria: | |
Coherence (1-5) - the collective quality of all sentences. We align this dimension with |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Testing flash attn with multipacking which essentially packs sequences using https://github.com/imoneoi/multipack_sampler, | |
and passes a single sequence of `1 x (bs x seqlen)` to the model to avoid padding. | |
An alternative is to use block diagonal attention as attention bias, but the following uses flash attention 2 which | |
is much faster. | |
Multipacking can be used to speed up both pretraining and finetuning. | |
""" |
We can make this file beautiful and searchable if this error is corrected: No tabs found in this TSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TsvHttpData-1.0 | |
https://files.pushshift.io/reddit/comments/RC_2005-12.zst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fastai.vision.all import * | |
__all__ = ["EMA", "SWA"] | |
class EMA(Callback): | |
"https://fastai.github.io/timmdocs/training_modelEMA" | |
order,run_valid=5,False | |
def __init__(self, decay=0.9999): | |
super().__init__() | |
self.decay = decay |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fastai.vision.all import * | |
from torch.cuda.amp import autocast, GradScaler | |
from torch.cuda.amp.grad_scaler import _refresh_per_optimizer_state | |
from sam import SAM | |
class FastaiSched: | |
def __init__(self, optimizer, max_lr): | |
self.optimizer = optimizer | |
self.lr_sched = combine_scheds([0.1,0.9], [SchedLin(1e-8,max_lr), SchedCos(max_lr,1e-8)]) | |
self.update(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import wandb | |
from fastai.callback.wandb import WandbCallback | |
from fastai.distributed import * | |
torch.backends.cudnn.benchmark = True | |
from zero_optimizer import ZeroRedundancyOptimizer | |
@patch | |
def after_batch(self: WandbCallback): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@call_parse | |
def main( | |
size: Param("Image resolution", int)=224, | |
bs: Param("Batch Size", int)=128, | |
epochs: Param("Number of epochs for training", int)=1, | |
lr: Param("Learning rate for training", float)=5e-5): | |
WANDB = True | |
# start wandb |
NewerOlder