This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import logging | |
import math | |
from typing import Any, Dict, List, Optional, Tuple | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from torch.nn.parameter import Parameter, Variable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2017-present, Moscow Institute of Physics and Technology. | |
# All rights reserved. | |
# This source code is licensed under the BSD-style license found in the | |
# LICENSE file in the root directory of this source tree. An additional grant | |
# of patent rights can be found in the PATENTS file in the same directory. | |
from parlai.core.params import ParlaiParser | |
from parlai.core.agents import Agent | |
from parlai.core.utils import display_messages | |
from projects.convai2.models.ftlm.wild_eval_world import ConvAIWorld |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from transformers import BertTokenizer, TFBertForSequenceClassification | |
import numpy as np | |
# seq_length = 128 | |
# nb_examples = 1 | |
# voc_size = 25000 | |
# input_ids = tf.random.uniform((nb_examples,seq_length), | |
# maxval=voc_size, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torchvision.models as models | |
resnet18 = models.resnet18() | |
from transformers import BertEmbeddings, BertEncoder | |
class MMBDEmbeddings(nn.Module): | |
def __init__(self, | |
text_mod_embds = BertEmbeddings, # Or your favorite bidirectional transformer | |
vision_mod_embds = resnet18): # Or your favorite vision model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cuda for Ubuntu18.04 | |
CUDA_REPO_PKG=cuda-repo-ubuntu1804_10.1.243-1_amd64.deb | |
wget -O /tmp/${CUDA_REPO_PKG} http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/${CUDA_REPO_PKG} | |
sudo dpkg -i /tmp/${CUDA_REPO_PKG} | |
sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | |
rm -f /tmp/${CUDA_REPO_PKG} | |
sudo apt-get update | |
sudo apt-get install cuda-drivers -y | |
sudo apt-get install cuda -y |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#SBATCH --job-name=idefics_zero3_finetuning_multinode # name | |
#SBATCH --nodes=3 # nodes | |
#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node! | |
#SBATCH --cpus-per-task=96 # number of cores per tasks | |
#SBATCH --gres=gpu:8 # number of gpus | |
#SBATCH --output=%x-%j.out # output file name | |
export GPUS_PER_NODE=8 | |
export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import numpy as np | |
import logging | |
IMAGE_TOKEN = "<image>" | |
FAKE_TOKEN_AROUND_IMAGE_V2 = "<fake_token_around_image>" | |
_MIN_LENGTH_DOCUMENTS_TO_PACK = ( | |
5 # Minimum lengths of documents to pack together (lenghts is measures in number of tokens) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# flake8: noqa | |
# Copyright 2024 The HuggingFace Inc. team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from torch.optim import Optimizer | |
KD_loss = nn.KLDivLoss(reduction='batchmean') | |
def kd_step(teacher: nn.Module, | |
student: nn.Module, | |
temperature: float, |