Shahbaz Syed shahbazsyed

## apple-silicon.preset.json
{
  "name": "Apple Silicon",
  "load_params": {
    "n_ctx": 2048,
    "n_batch": 512,
    "rope_freq_base": 10000,
    "rope_freq_scale": 1,
    "n_gpu_layers": 1,
    "use_mlock": false,
    "main_gpu": 0,

## LLM.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                shahbazsyed
                / LLM.md
            
            
              Created
              March 29, 2023 10:34
                — forked from rain-1/LLM.md
            
              
                LLM Introduction: Learn Language Models
              
          
    Purpose

Bootstrap knowledge of LLMs ASAP. With a bias/focus to GPT.
Avoid being a link dump. Try to provide only valuable well tuned information.
Prelude

Neural network links before starting with transformers.

  
## bart-preprocess-example.sh
source ~/miniconda3/bin/activate allen

LANG=en
TASK=qa_en_small
for SPLIT in train valid
  do
    python -m examples.roberta.multiprocessing_bpe_encoder \
    --encoder-json encoder.json \
    --vocab-bpe vocab.bpe \
    --inputs "$TASK/$SPLIT.$LANG" \

## bart-train-example.sh

fairseq-train qa_en_small-bin \
--log-interval=10 \
--log-format=json \
--tensorboard-logdir=/users/tom/ed/sp/pretrain/tests/fairseq/bart_en_small/logs \
--seed=1 \
--cpu \
--min-loss-scale=0.0001 \
--model-parallel-size=1 \
--criterion=cross_entropy \

## parallel.py
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang, Rutgers University, Email: zhang.hang@rutgers.edu
## Modified by Thomas Wolf, HuggingFace Inc., Email: thomas@huggingface.co
## Copyright (c) 2017-2018
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

"""Encoding Data Parallel"""

## word2vec_tf_idf_from_wikipeida.py
import multiprocessing
from gensim.corpora.wikicorpus import WikiCorpus
from gensim.models.word2vec import Word2Vec
from gensim.models import TfidfModel

# logging is important to get the state of the functions
import logging
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
logging.root.setLevel(level=logging.INFO)
	{
	"name": "Apple Silicon",
	"load_params": {
	"n_ctx": 2048,
	"n_batch": 512,
	"rope_freq_base": 10000,
	"rope_freq_scale": 1,
	"n_gpu_layers": 1,
	"use_mlock": false,
	"main_gpu": 0,
	source ~/miniconda3/bin/activate allen

	LANG=en
	TASK=qa_en_small
	for SPLIT in train valid
	do
	python -m examples.roberta.multiprocessing_bpe_encoder \
	--encoder-json encoder.json \
	--vocab-bpe vocab.bpe \
	--inputs "$TASK/$SPLIT.$LANG" \

	fairseq-train qa_en_small-bin \
	--log-interval=10 \
	--log-format=json \
	--tensorboard-logdir=/users/tom/ed/sp/pretrain/tests/fairseq/bart_en_small/logs \
	--seed=1 \
	--cpu \
	--min-loss-scale=0.0001 \
	--model-parallel-size=1 \
	--criterion=cross_entropy \
	##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	## Created by: Hang Zhang, Rutgers University, Email: zhang.hang@rutgers.edu
	## Modified by Thomas Wolf, HuggingFace Inc., Email: thomas@huggingface.co
	## Copyright (c) 2017-2018
	##
	## This source code is licensed under the MIT-style license found in the
	## LICENSE file in the root directory of this source tree
	##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

	"""Encoding Data Parallel"""
	import multiprocessing
	from gensim.corpora.wikicorpus import WikiCorpus
	from gensim.models.word2vec import Word2Vec
	from gensim.models import TfidfModel

	# logging is important to get the state of the functions
	import logging
	logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
	logging.root.setLevel(level=logging.INFO)