batch_size | seq_len | pad_percentage | HF_time | BT_time | Speedup |
---|---|---|---|---|---|
8 | 64 | 0 | 0.025156218261718752 | 0.013504798583984375 | 1.8627614551432141 |
8 | 64 | 0.1 | 0.024825302734375 | 0.013796290283203125 | 1.7994187005908109 |
8 | 64 | 0.2 | 0.02481883056640625 | 0.013484359130859375 | 1.8405643401774714 |
8 | 64 | 0.5 | 0.02459789306640625 | 0.013327769775390625 | 1.8456120927167883 |
8 | 64 | 0.75 | 0.02464018310546875 | 0.01304416259765625 | 1.8889815977835211 |
8 | 128 | 0 | 0.02547349609375 | 0.0134563427734375 | 1.8930475035188665 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import torch | |
import numpy as np | |
import argparse | |
from transformers import pipeline | |
parser = argparse.ArgumentParser(description='Benchmark pipeline runtime for int8 models') | |
parser.add_argument('--batch_size', default=1, type=int, help='batch_size for experiments') | |
parser.add_argument('--nb_runs', default=10, type=int, help='number of times for repeating experiments') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import torch | |
import numpy as np | |
import argparse | |
from transformers import pipeline | |
parser = argparse.ArgumentParser(description='Benchmark pipeline runtime for int8 models') | |
parser.add_argument('--batch_size', default=1, type=int, help='batch_size for experiments') | |
parser.add_argument('--nb_runs', default=10, type=int, help='number of times for repeating experiments') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import tokenizers | |
import torch | |
import numpy as np | |
import argparse | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
parser = argparse.ArgumentParser(description='Benchmark pipeline runtime for int8 models') | |
parser.add_argument('--batch_size', default=1, type=int, help='batch_size for experiments') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import datetime | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
def get_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--checkpoint", type=str, help="Checkpoint path", required=True) | |
parser.add_argument("--max-memory-per-gpu", type=str, help="Defines maximum memory allocated to gpu", required=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
MAX_NEW_TOKENS = 128 | |
model_name = 'facebook/opt-66b' | |
text = """ | |
Q: On average Joe throws 25 punches per minute. A fight lasts 5 rounds of 3 minutes. | |
How many punches did he throw?\n | |
A: Let’s think step by step.\n""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import os | |
import json | |
import argparse | |
parser = argparse.ArgumentParser(description='Sharding Hugging Face models') | |
parser.add_argument('--sharding_factor', default=4, type=int, help='Sharding factor - aka how many shards to create') | |
parser.add_argument('--source_model_path', default="t5-v1_1-xl", type=str, help='Relative path to the source model folder') | |
parser.add_argument('--sharded_model_path', default="t5-v1_1-xl-sharded", type=str, help='Relative path to the target sharded model folder') | |
args = parser.parse_args() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from bitsandbytes.nn import Linear8bitLt | |
# Utility function | |
def get_model_memory_footprint(model): | |
r""" | |
Partially copied and inspired from: https://discuss.pytorch.org/t/gpu-memory-that-model-uses/56822/2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Dict, Union | |
from sqlalchemy import false | |
import torch | |
from transformers.utils.hub import convert_file_size_to_int | |
from transformers.utils import WEIGHTS_NAME, WEIGHTS_INDEX_NAME | |
from transformers.modeling_utils import dtype_byte_size | |
import os | |
from transformers.models.switch_transformers.convert_switch_transformers_original_flax_checkpoint_to_pytorch import rename_keys | |
from flax.traverse_util import flatten_dict, unflatten_dict | |
from tensorflow.io import gfile |
batch_size | seq_len | pad_percentage | HF_time | BT_time | Speedup |
---|---|---|---|---|---|
8 | 64 | 0 | 0.012947250976562501 | 0.007366779174804688 | 1.7575185395598296 |
8 | 64 | 0.1 | 0.012887490234375 | 0.0072092669677734375 | 1.7876283805252486 |
8 | 64 | 0.2 | 0.012887449951171874 | 0.007392645263671876 | 1.7432799074645124 |
8 | 64 | 0.5 | 0.012756295166015626 | 0.007139061889648438 | 1.7868307297506574 |
8 | 64 | 0.75 | 0.0132109521484375 | 0.007268699951171876 | 1.8175123800931694 |
8 | 128 | 0 | 0.012552437744140625 | 0.0075138049316406256 | 1.6705833939449668 |
OlderNewer