Skip to content

Instantly share code, notes, and snippets.

View younesbelkada's full-sized avatar
:octocat:
Working from home

Younes Belkada younesbelkada

:octocat:
Working from home
View GitHub Profile
import time
import torch
import numpy as np
import argparse
from transformers import pipeline
parser = argparse.ArgumentParser(description='Benchmark pipeline runtime for int8 models')
parser.add_argument('--batch_size', default=1, type=int, help='batch_size for experiments')
parser.add_argument('--nb_runs', default=10, type=int, help='number of times for repeating experiments')
@younesbelkada
younesbelkada / benchmark_generate.py
Created August 2, 2022 21:35
Benchmark using `generate`
import time
import tokenizers
import torch
import numpy as np
import argparse
from transformers import AutoModelForCausalLM, AutoTokenizer
parser = argparse.ArgumentParser(description='Benchmark pipeline runtime for int8 models')
parser.add_argument('--batch_size', default=1, type=int, help='batch_size for experiments')
@younesbelkada
younesbelkada / benchmark_generate_jz.py
Last active August 12, 2022 09:03
Benchmarking inference
import argparse
import datetime
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--checkpoint", type=str, help="Checkpoint path", required=True)
parser.add_argument("--max-memory-per-gpu", type=str, help="Defines maximum memory allocated to gpu", required=True)
@younesbelkada
younesbelkada / bnb_example.py
Created August 13, 2022 18:00
A minimal script to run `bitsandbytes` int8 inference
import torch
import torch.nn as nn
from bitsandbytes.nn import Linear8bitLt
# Utility function
def get_model_memory_footprint(model):
r"""
Partially copied and inspired from: https://discuss.pytorch.org/t/gpu-memory-that-model-uses/56822/2
@younesbelkada
younesbelkada / save_sequential.py
Created November 8, 2022 15:04
A script to save sequentially any `t5x` checkpoint
from typing import Dict, Union
from sqlalchemy import false
import torch
from transformers.utils.hub import convert_file_size_to_int
from transformers.utils import WEIGHTS_NAME, WEIGHTS_INDEX_NAME
from transformers.modeling_utils import dtype_byte_size
import os
from transformers.models.switch_transformers.convert_switch_transformers_original_flax_checkpoint_to_pytorch import rename_keys
from flax.traverse_util import flatten_dict, unflatten_dict
from tensorflow.io import gfile

Bert large uncased - half - CUDA - T4

batch_size seq_len pad_percentage HF_time BT_time Speedup
8 64 0 0.025156218261718752 0.013504798583984375 1.8627614551432141
8 64 0.1 0.024825302734375 0.013796290283203125 1.7994187005908109
8 64 0.2 0.02481883056640625 0.013484359130859375 1.8405643401774714
8 64 0.5 0.02459789306640625 0.013327769775390625 1.8456120927167883
8 64 0.75 0.02464018310546875 0.01304416259765625 1.8889815977835211
8 128 0 0.02547349609375 0.0134563427734375 1.8930475035188665

bert-base-uncased - A100 - half - cuda

batch_size seq_len pad_percentage HF_time BT_time Speedup
8 64 0 0.012947250976562501 0.007366779174804688 1.7575185395598296
8 64 0.1 0.012887490234375 0.0072092669677734375 1.7876283805252486
8 64 0.2 0.012887449951171874 0.007392645263671876 1.7432799074645124
8 64 0.5 0.012756295166015626 0.007139061889648438 1.7868307297506574
8 64 0.75 0.0132109521484375 0.007268699951171876 1.8175123800931694
8 128 0 0.012552437744140625 0.0075138049316406256 1.6705833939449668
@younesbelkada
younesbelkada / setup-m1.py
Created December 8, 2022 10:55
How to install `transformers` `dev` environment in Mac M1
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@younesbelkada
younesbelkada / example_code_int8.py
Last active December 9, 2022 08:00
An example script to run bnb int8 models using `bitsandbytes` and `transformers`
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
MAX_NEW_TOKENS = 128
model_name = 'facebook/opt-66b'
text = """
Q: On average Joe throws 25 punches per minute. A fight lasts 5 rounds of 3 minutes.
How many punches did he throw?\n
A: Let’s think step by step.\n"""
@younesbelkada
younesbelkada / convert_t5x_to_pt.py
Created December 22, 2022 22:07
Handy script to convert any `t5x` checkpoint to an un-nested dictionary
from t5x import checkpoints
from flax.traverse_util import flatten_dict, unflatten_dict
def rename_keys(key):
if "kernel" in key:
key = key.replace("kernel", "weight")
return key
flax_checkpoint_path = "/home/younes_huggingface_co/code/pix2struct/pix2struct_base"