Skip to content

Instantly share code, notes, and snippets.

View VictorSanh's full-sized avatar

Victor SANH VictorSanh

View GitHub Profile
@VictorSanh
VictorSanh / kd.py
Last active July 3, 2024 07:12
Knowledge Distilation
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Optimizer
KD_loss = nn.KLDivLoss(reduction='batchmean')
def kd_step(teacher: nn.Module,
student: nn.Module,
temperature: float,
@VictorSanh
VictorSanh / idefics2_trl.py
Created April 14, 2024 01:47
first stab at idefics2 + trl sft (adapted from the llava trl sft training example)
# flake8: noqa
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@VictorSanh
VictorSanh / packing.py
Created October 19, 2023 02:38
Packing and splitting OBELICS style documents for IDEFICS training
import torch
import numpy as np
import logging
IMAGE_TOKEN = "<image>"
FAKE_TOKEN_AROUND_IMAGE_V2 = "<fake_token_around_image>"
_MIN_LENGTH_DOCUMENTS_TO_PACK = (
5 # Minimum lengths of documents to pack together (lenghts is measures in number of tokens)
)
@VictorSanh
VictorSanh / finetuning.slurm
Last active October 2, 2023 22:07
IDEFICS fine tuning with zero ds 3
#!/bin/bash
#SBATCH --job-name=idefics_zero3_finetuning_multinode # name
#SBATCH --nodes=3 # nodes
#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node!
#SBATCH --cpus-per-task=96 # number of cores per tasks
#SBATCH --gres=gpu:8 # number of gpus
#SBATCH --output=%x-%j.out # output file name
export GPUS_PER_NODE=8
export MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
# Cuda for Ubuntu18.04
CUDA_REPO_PKG=cuda-repo-ubuntu1804_10.1.243-1_amd64.deb
wget -O /tmp/${CUDA_REPO_PKG} http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/${CUDA_REPO_PKG}
sudo dpkg -i /tmp/${CUDA_REPO_PKG}
sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
rm -f /tmp/${CUDA_REPO_PKG}
sudo apt-get update
sudo apt-get install cuda-drivers -y
sudo apt-get install cuda -y
import torchvision.models as models
resnet18 = models.resnet18()
from transformers import BertEmbeddings, BertEncoder
class MMBDEmbeddings(nn.Module):
def __init__(self,
text_mod_embds = BertEmbeddings, # Or your favorite bidirectional transformer
vision_mod_embds = resnet18): # Or your favorite vision model
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
import numpy as np
# seq_length = 128
# nb_examples = 1
# voc_size = 25000
# input_ids = tf.random.uniform((nb_examples,seq_length),
# maxval=voc_size,
# Copyright (c) 2017-present, Moscow Institute of Physics and Technology.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
from parlai.core.params import ParlaiParser
from parlai.core.agents import Agent
from parlai.core.utils import display_messages
from projects.convai2.models.ftlm.wild_eval_world import ConvAIWorld
@VictorSanh
VictorSanh / relation_extraction.py
Created August 14, 2018 15:36
RE model - Reimplementation from G. Bekoulis
# coding: utf-8
import logging
import math
from typing import Any, Dict, List, Optional, Tuple
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter, Variable