Bootstrap knowledge of LLMs ASAP. With a bias/focus to GPT.
Avoid being a link dump. Try to provide only valuable well tuned information.
Neural network links before starting with transformers.
{ | |
"name": "Apple Silicon", | |
"load_params": { | |
"n_ctx": 2048, | |
"n_batch": 512, | |
"rope_freq_base": 10000, | |
"rope_freq_scale": 1, | |
"n_gpu_layers": 1, | |
"use_mlock": false, | |
"main_gpu": 0, |
source ~/miniconda3/bin/activate allen | |
LANG=en | |
TASK=qa_en_small | |
for SPLIT in train valid | |
do | |
python -m examples.roberta.multiprocessing_bpe_encoder \ | |
--encoder-json encoder.json \ | |
--vocab-bpe vocab.bpe \ | |
--inputs "$TASK/$SPLIT.$LANG" \ |
fairseq-train qa_en_small-bin \ | |
--log-interval=10 \ | |
--log-format=json \ | |
--tensorboard-logdir=/users/tom/ed/sp/pretrain/tests/fairseq/bart_en_small/logs \ | |
--seed=1 \ | |
--cpu \ | |
--min-loss-scale=0.0001 \ | |
--model-parallel-size=1 \ | |
--criterion=cross_entropy \ |
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
## Created by: Hang Zhang, Rutgers University, Email: zhang.hang@rutgers.edu | |
## Modified by Thomas Wolf, HuggingFace Inc., Email: thomas@huggingface.co | |
## Copyright (c) 2017-2018 | |
## | |
## This source code is licensed under the MIT-style license found in the | |
## LICENSE file in the root directory of this source tree | |
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
"""Encoding Data Parallel""" |
import multiprocessing | |
from gensim.corpora.wikicorpus import WikiCorpus | |
from gensim.models.word2vec import Word2Vec | |
from gensim.models import TfidfModel | |
# logging is important to get the state of the functions | |
import logging | |
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s') | |
logging.root.setLevel(level=logging.INFO) |