Vishal Goklani vgoklani

## modeling_mixtral.py
# coding=utf-8
# Copyright 2023 Mixtral AI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

## torch_ddp_verify.py
def verify_ddp_weights_equal(model: torch.nn.Module, atol: float = 1e-5) -> None:
    if hasattr(model, "module"):
        model = model.module

    world_size = get_world_size()
    for name, param in model.named_parameters():
        gathered_param = gather(param).reshape((world_size, -1))
        absolute_diffs = (gathered_param[None, 0, :] - gathered_param).abs()
        rank_params_eq = (absolute_diffs < atol).all()
        assert rank_params_eq, f"❌ param [{name}] not equal - got max_absolute_diff={absolute_diffs.max()}"

## torch_ddp_verify.py
def verify_ddp_weights_equal(model: torch.nn.Module, atol: float = 1e-5) -> None:
    if hasattr(model, "module"):
        model = model.module

    world_size = get_world_size()
    for name, param in model.named_parameters():
        gathered_param = gather(param).reshape((world_size, -1))
        absolute_diffs = (gathered_param[None, 0, :] - gathered_param).abs()
        rank_params_eq = (absolute_diffs < atol).all()
        assert rank_params_eq, f"❌ param [{name}] not equal - got max_absolute_diff={absolute_diffs.max()}"

## llama_flash.py
from transformers import (
  AutoConfig,
  AutoTokenizer,
  BitsAndBytesConfig,
  GenerationConfig,
  AutoModelForCausalLM,
  LlamaTokenizerFast,
  PreTrainedModel,
  TextIteratorStreamer,
  StoppingCriteria,

## pypdfjs.py
# SPDX-FileCopyrightText: 2023 mara004
# SPDX-License-Identifier: CC-BY-4.0 OR Apache-2.0

# See also https://github.com/extremeheat/JSPyBridge/blob/master/examples/python/pdfjs.py

# Py-Depends: pillow, javascript >= 1.1.0 (jspybridge)
# Js-Depends: pdfjs-dist, canvas
# Use `python -m pip install` and `python -m javascript --install`

import argparse

## benchmark_self_attention.py
import argparse
import contextlib
import logging
import math
import random
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Callable

## 1-pw_op_fusion.py
import torch
import torch._inductor.config
import time

torch._inductor.config.triton.cudagraphs = False
torch.set_float32_matmul_precision('high')

def bench(f, name=None, iters=100, warmup=5, display=True, profile=False):
    for _ in range(warmup):
        f()

## aritra_sayak_vit_works.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              3 stars
            
          
                sayakpaul
                / aritra_sayak_vit_works.md
            
            
              Created
              May 8, 2022 07:19
            
          
Work
Details


Augmenting convnets with aggregated attention
Tutorial by Aritra


Train a Vision Transformer on small datasets
Tutorial by Aritra


MobileViT
Tutorial by Sayak


Compact Convolutional Transformers
Tutorial by Sayak


Data efficient image transformers
TF implementation, TF pre-trained models, tutorial by Sayak


Class attention image transformers
TF implementation, TF pre-trained models by Sayak


Masked Autoencoders
TF implementation, tutorial by Aritra and Sayak, 
Contribution to Hugging Face Transformers by Aritra and Sayak


Probing the representation of ViTs


## attention_distance.py
# Copyright 2022 Google LLC.
# SPDX-License-Identifier: Apache-2.0
# Author: Maithra Raghu <maithra@google.com>


def compute_distance_matrix(patch_size, num_patches, length):
  """Helper function to compute distance matrix."""

  distance_matrix = np.zeros((num_patches, num_patches))

## speed2.py
#!/usr/bin/env python

# Any copyright is dedicated to the Public Domain.
# https://creativecommons.org/publicdomain/zero/1.0/

# Written by Francois Fleuret <francois@fleuret.org>
# Modified by François Lagunas <francois.lagunas@m4x.org>

import time, torch
	# coding=utf-8
	# Copyright 2023 Mixtral AI and the HuggingFace Inc. team. All rights reserved.
	#
	# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
	# and OPT implementations in this library. It has been modified from its
	# original forms to accommodate minor architectural differences compared
	# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	def verify_ddp_weights_equal(model: torch.nn.Module, atol: float = 1e-5) -> None:
	if hasattr(model, "module"):
	model = model.module

	world_size = get_world_size()
	for name, param in model.named_parameters():
	gathered_param = gather(param).reshape((world_size, -1))
	absolute_diffs = (gathered_param[None, 0, :] - gathered_param).abs()
	rank_params_eq = (absolute_diffs < atol).all()
	assert rank_params_eq, f"❌ param [{name}] not equal - got max_absolute_diff={absolute_diffs.max()}"
	from transformers import (
	AutoConfig,
	AutoTokenizer,
	BitsAndBytesConfig,
	GenerationConfig,
	AutoModelForCausalLM,
	LlamaTokenizerFast,
	PreTrainedModel,
	TextIteratorStreamer,
	StoppingCriteria,
	# SPDX-FileCopyrightText: 2023 mara004
	# SPDX-License-Identifier: CC-BY-4.0 OR Apache-2.0

	# See also https://github.com/extremeheat/JSPyBridge/blob/master/examples/python/pdfjs.py

	# Py-Depends: pillow, javascript >= 1.1.0 (jspybridge)
	# Js-Depends: pdfjs-dist, canvas
	# Use `python -m pip install` and `python -m javascript --install`

	import argparse
	import argparse
	import contextlib
	import logging
	import math
	import random
	import time
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Callable
	import torch
	import torch._inductor.config
	import time

	torch._inductor.config.triton.cudagraphs = False
	torch.set_float32_matmul_precision('high')

	def bench(f, name=None, iters=100, warmup=5, display=True, profile=False):
	for _ in range(warmup):
	f()
Work	Details
Augmenting convnets with aggregated attention	Tutorial by Aritra
Train a Vision Transformer on small datasets	Tutorial by Aritra
MobileViT	Tutorial by Sayak
Compact Convolutional Transformers	Tutorial by Sayak
Data efficient image transformers	TF implementation, TF pre-trained models, tutorial by Sayak
Class attention image transformers	TF implementation, TF pre-trained models by Sayak
Masked Autoencoders	TF implementation, tutorial by Aritra and Sayak, Contribution to Hugging Face Transformers by Aritra and Sayak
Probing the representation of ViTs
	# Copyright 2022 Google LLC.
	# SPDX-License-Identifier: Apache-2.0
	# Author: Maithra Raghu <maithra@google.com>


	def compute_distance_matrix(patch_size, num_patches, length):
	"""Helper function to compute distance matrix."""

	distance_matrix = np.zeros((num_patches, num_patches))
	#!/usr/bin/env python

	# Any copyright is dedicated to the Public Domain.
	# https://creativecommons.org/publicdomain/zero/1.0/

	# Written by Francois Fleuret <francois@fleuret.org>
	# Modified by François Lagunas <francois.lagunas@m4x.org>

	import time, torch