Skip to content

Instantly share code, notes, and snippets.

View pacman100's full-sized avatar
💻
Working from home

Sourab Mangrulkar pacman100

💻
Working from home
View GitHub Profile
from accelerate import Accelerator
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel as FSDP
import contextlib
MODEL_NAME = "meta-llama/Llama-2-70b-chat-hf" #"HuggingFaceH4/zephyr-7b-beta"
def main():
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
/*
* Starter Project for WhatsApp Echo Bot Tutorial
*
* Remix this as the starting point for following the WhatsApp Echo Bot tutorial
*
*/
"use strict";
// Access token for your app
accelerator.process_index=0 GPU Memory before entering the loading : 0
accelerator.process_index=0 GPU Memory consumed at the end of the loading (end-begin): 0
accelerator.process_index=0 GPU Peak Memory consumed during the loading (max-begin): 0
accelerator.process_index=0 GPU Total Peak Memory consumed during the loading (max): 0
accelerator.process_index=0 CPU Memory before entering the loading : 926
accelerator.process_index=0 CPU Memory consumed at the end of the loading (end-begin): 26415
accelerator.process_index=0 CPU Peak Memory consumed during the loading (max-begin): 31818
accelerator.process_index=0 CPU Total Peak Memory consumed during the loading (max): 32744
accelerator.process_index=0 model.lm_head.weight=Parameter containing:
tensor([[-0.0179, 0.0201, -0.0273, ..., -0.0275, -0.0396, -0.0131],
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
import os
import sys
from typing import List
import fire
import torch
import transformers
from datasets import load_dataset, DatasetDict
from transformers import Seq2SeqTrainer, TrainerCallback, TrainingArguments, TrainerState, TrainerControl
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
import os
import sys
from typing import List
import fire
import torch
import transformers
from datasets import load_dataset, DatasetDict
from transformers import Seq2SeqTrainer, TrainerCallback, TrainingArguments, TrainerState, TrainerControl
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /tmp/ipykernel_2299032/1843815119.py:2 in <module> │
│ │
│ [Errno 2] No such file or directory: '/tmp/ipykernel_2299032/1843815119.py' │
│ │
│ /home/sourab/miniconda3/envs/ml/lib/python3.10/site-packages/optimum/onnxruntime/modeling_ort.py │
│ :581 in from_pretrained │
│ │
│ 578 │ │ Returns: │
│ 579 │ │ │ `ORTModel`: The loaded ORTModel model. │
import gc
import os
import sys
import psutil
import threading
import argparse
import transformers
import datasets
import numpy as np
import torch
#checking if conversion is correct
code:
import sys
import torch
from megatron import get_args, get_tokenizer, initialize_megatron, mpu
from megatron.model import GPTModel
from megatron.training import get_model
from megatron.checkpointing import load_checkpoint