This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List, Any | |
import abc | |
from transformers import PreTrainedTokenizerFast | |
class BaseTokenizer(abc.ABC): | |
def __init__(self) -> None: | |
super().__init__() | |
self.bos_id = 1 | |
self.eos_id = 2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import openai | |
text_to_embeddings_cache = {} | |
def get_embeddings_for_instructions(instructions: list[str]): | |
response = openai.Embedding.create( | |
input=instructions, | |
model="text-embedding-ada-002" | |
) | |
return response['data'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from datasets import Dataset | |
from transformers import ( | |
AutoModelForMaskedLM, | |
AutoTokenizer, | |
DataCollatorForLanguageModeling, | |
Trainer, | |
TrainingArguments, | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch.utils.data import DataLoader | |
from transformers import AutoModel, AutoTokenizer | |
from transformers.optimization import get_cosine_schedule_with_warmup | |
import pytorch_lightning as pl | |
# from deepspeed.ops.adam import FusedAdam - get different error (expected tensor on cuda but got cpu) with FusedAdam | |
class BoringDataset(torch.utils.data.Dataset): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.optim import AdamW | |
from transformers import AutoModel | |
def get_bert_layerwise_lr_groups(bert_model, learning_rate=1e-5, layer_decay=0.9): | |
""" | |
Gets parameter groups with decayed learning rate based on depth in network | |
Layers closer to output will have higher learning rate | |
Args: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for name in $(aws dynamodb list-tables | jq .TableNames) | |
do | |
l=${#name} | |
if [ ${#name} -le 3 ]; then echo "Skipping name!" | |
else | |
short=${name:1:l-2} # clean json string representation of dataset | |
if [ "${short: -1}" == '"' ] | |
then short=${name:1:l-3} | |
fi | |
echo "Updating Table: ${short}" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import pytorch_lightning as pl | |
class BoringModel(pl.LightningModule): | |
def __init__( | |
self | |
): | |
super(BoringModel, self).__init__() | |
self.automatic_optimization = False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import pytorch_lightning as pl | |
class NCE(pl.LightningModule): | |
""" | |
This implementation is taken from https://github.com/Spijkervet/SimCLR/blob/master/simclr/modules/nt_xent.py | |
The mask_correlated_samples funtion has been modified to be much faster to compute | |
and therefore be able to be called at train time without a predifined batch size. | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import pytorch_lightning as pl | |
class MyTransformer(pl.LightningModule): | |
def __init__( | |
self, | |
learning_rate=0.001, | |
warmup=4000, | |
): | |
self.learning_rate = learning_rate |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://apple.stackexchange.com/questions/175215/how-do-i-assign-a-keyboard-shortcut-to-an-applescript-i-wrote | |
on run {input, parameters} | |
tell application "System Events" | |
key code 123 using command down | |
keystroke "print(" | |
key code 124 using command down | |
keystroke ")" | |
end tell |
NewerOlder