Ebenge Usip eusip

## onnx_t5.py
import inspect
import logging
import os
from pathlib import Path

import torch
from psutil import cpu_count
from transformers import T5Config, T5ForConditionalGeneration, T5Tokenizer
from transformers.generation_utils import GenerationMixin
from transformers.modeling_outputs import BaseModelOutputWithPast, Seq2SeqLMOutput

## generate-onnx.py
import time
import torch
import torch.nn.functional as F
from tqdm import trange

from transformers import AutoTokenizer
from onnxruntime import InferenceSession


class GenerativeT5(torch.nn.Module):

## oasst-pythia-12b-05-03-2023.py
import transformers
import argparse
import numpy as np
import pandas as pd

from huggingface_hub import HfFolder

import evaluate
from datasets import load_dataset, Dataset, load_metric, concatenate_datasets, DatasetDict
from transformers import AutoModelForCausalLM, AutoTokenizer

## amal_notes.txt
activation functions
--------------------
commonly use: ReLU, Sigmoid (0 to 1), Tanh (-1 to 1)
- Sigmoid functions and their combinations generally work better in the case of classifiers
- Sigmoids and tanh functions are sometimes avoided due to the vanishing gradient problem
- ReLU function is a general activation function and is used in most cases these days
- If we encounter a case of dead neurons in our networks the leaky ReLU function is the best choice
- Always keep in mind that ReLU function should only be used in the hidden layers
- As a rule of thumb, you can begin with using ReLU function and then move over to other activation functions in case ReLU doesn’t provide with optimum results
	import inspect
	import logging
	import os
	from pathlib import Path

	import torch
	from psutil import cpu_count
	from transformers import T5Config, T5ForConditionalGeneration, T5Tokenizer
	from transformers.generation_utils import GenerationMixin
	from transformers.modeling_outputs import BaseModelOutputWithPast, Seq2SeqLMOutput
	import time
	import torch
	import torch.nn.functional as F
	from tqdm import trange

	from transformers import AutoTokenizer
	from onnxruntime import InferenceSession


	class GenerativeT5(torch.nn.Module):
	import transformers
	import argparse
	import numpy as np
	import pandas as pd

	from huggingface_hub import HfFolder

	import evaluate
	from datasets import load_dataset, Dataset, load_metric, concatenate_datasets, DatasetDict
	from transformers import AutoModelForCausalLM, AutoTokenizer
	activation functions
	--------------------
	commonly use: ReLU, Sigmoid (0 to 1), Tanh (-1 to 1)
	- Sigmoid functions and their combinations generally work better in the case of classifiers
	- Sigmoids and tanh functions are sometimes avoided due to the vanishing gradient problem
	- ReLU function is a general activation function and is used in most cases these days
	- If we encounter a case of dead neurons in our networks the leaky ReLU function is the best choice
	- Always keep in mind that ReLU function should only be used in the hidden layers
	- As a rule of thumb, you can begin with using ReLU function and then move over to other activation functions in case ReLU doesn’t provide with optimum results