Gautier Dagan gautierdag

## token_healing.py
from typing import List, Any
import abc
from transformers import PreTrainedTokenizerFast


class BaseTokenizer(abc.ABC):
    def __init__(self) -> None:
        super().__init__()
        self.bos_id = 1
        self.eos_id = 2

## cache.py
import openai

text_to_embeddings_cache = {}

def get_embeddings_for_instructions(instructions: list[str]):
        response = openai.Embedding.create(
            input=instructions,
            model="text-embedding-ada-002"
        )
        return response['data']

## pretrain.py

import pandas as pd
from datasets import Dataset
from transformers import (
    AutoModelForMaskedLM,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
)

## deepspeedbug.py
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from transformers import AutoModel, AutoTokenizer
from transformers.optimization import get_cosine_schedule_with_warmup
import pytorch_lightning as pl

# from deepspeed.ops.adam import FusedAdam - get different error (expected tensor on cuda but got cpu) with FusedAdam

class BoringDataset(torch.utils.data.Dataset):

## layerwise_lr.py
import torch
from torch.optim import AdamW
from transformers import AutoModel

def get_bert_layerwise_lr_groups(bert_model, learning_rate=1e-5, layer_decay=0.9):
    """
    Gets parameter groups with decayed learning rate based on depth in network
    Layers closer to output will have higher learning rate

    Args:

## update_tables.sh
for name in $(aws dynamodb list-tables | jq .TableNames)
do
    l=${#name}
    if [ ${#name} -le 3 ]; then echo "Skipping name!"
    else
       short=${name:1:l-2} # clean json string representation of dataset
       if [ "${short: -1}" == '"' ]
         then short=${name:1:l-3}
       fi
       echo "Updating Table: ${short}"

## multiple_optims_pl.py
import torch
import torch.nn as nn
import pytorch_lightning as pl

class BoringModel(pl.LightningModule):
    def __init__(
        self
    ):
        super(BoringModel, self).__init__()
        self.automatic_optimization = False

## NCE.py
import torch
import torch.nn as nn
import pytorch_lightning as pl

class NCE(pl.LightningModule):
    """
    This implementation is taken from https://github.com/Spijkervet/SimCLR/blob/master/simclr/modules/nt_xent.py
    The mask_correlated_samples funtion has been modified to be much faster to compute
    and therefore be able to be called at train time without a predifined batch size.
    """

## model_with_noam.py
import torch
import pytorch_lightning as pl

class MyTransformer(pl.LightningModule):
    def __init__(
        self,
        learning_rate=0.001,
        warmup=4000,
    ):
      self.learning_rate = learning_rate

## print_macro.workflow
# https://apple.stackexchange.com/questions/175215/how-do-i-assign-a-keyboard-shortcut-to-an-applescript-i-wrote

on run {input, parameters}

	tell application "System Events"
		key code 123 using command down
		keystroke "print("
		key code 124 using command down
		keystroke ")"
	end tell
	from typing import List, Any
	import abc
	from transformers import PreTrainedTokenizerFast


	class BaseTokenizer(abc.ABC):
	def __init__(self) -> None:
	super().__init__()
	self.bos_id = 1
	self.eos_id = 2
	import openai

	text_to_embeddings_cache = {}

	def get_embeddings_for_instructions(instructions: list[str]):
	response = openai.Embedding.create(
	input=instructions,
	model="text-embedding-ada-002"
	)
	return response['data']

	import pandas as pd
	from datasets import Dataset
	from transformers import (
	AutoModelForMaskedLM,
	AutoTokenizer,
	DataCollatorForLanguageModeling,
	Trainer,
	TrainingArguments,
	)
	import torch
	import torch.nn as nn
	from torch.utils.data import DataLoader
	from transformers import AutoModel, AutoTokenizer
	from transformers.optimization import get_cosine_schedule_with_warmup
	import pytorch_lightning as pl

	# from deepspeed.ops.adam import FusedAdam - get different error (expected tensor on cuda but got cpu) with FusedAdam

	class BoringDataset(torch.utils.data.Dataset):
	import torch
	from torch.optim import AdamW
	from transformers import AutoModel

	def get_bert_layerwise_lr_groups(bert_model, learning_rate=1e-5, layer_decay=0.9):
	"""
	Gets parameter groups with decayed learning rate based on depth in network
	Layers closer to output will have higher learning rate

	Args:
	for name in $(aws dynamodb list-tables \| jq .TableNames)
	do
	l=${#name}
	if [ ${#name} -le 3 ]; then echo "Skipping name!"
	else
	short=${name:1:l-2} # clean json string representation of dataset
	if [ "${short: -1}" == '"' ]
	then short=${name:1:l-3}
	fi
	echo "Updating Table: ${short}"
	import torch
	import pytorch_lightning as pl

	class MyTransformer(pl.LightningModule):
	def __init__(
	self,
	learning_rate=0.001,
	warmup=4000,
	):
	self.learning_rate = learning_rate
	# https://apple.stackexchange.com/questions/175215/how-do-i-assign-a-keyboard-shortcut-to-an-applescript-i-wrote

	on run {input, parameters}

	tell application "System Events"
	key code 123 using command down
	keystroke "print("
	key code 124 using command down
	keystroke ")"
	end tell