Skip to content

Instantly share code, notes, and snippets.

View Dref360's full-sized avatar

Frédéric Branchaud-Charron Dref360

View GitHub Profile
@Dref360
Dref360 / baal_s2t.py
Created February 10, 2024 16:53
Example of uncertainty estimation using Baal on Speech Recognition
# Wav2Vec in Baal
from datasets import load_dataset
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, TrainingArguments
from baal.active.heuristics import BALD
from baal.bayesian.dropout import patch_module
from baal.transformers_trainer_wrapper import BaalTransformersTrainer
# load model and tokenizer
@Dref360
Dref360 / baal_ner_hf.py
Last active June 29, 2023 14:11
Example on how to use Baal for NER usecases using HuggingFace.
from datasets import load_dataset
from transformers import pipeline, DataCollatorForTokenClassification
from baal.active.active_loop import ActiveLearningLoop
from baal.active.dataset import ActiveLearningDataset
from baal.active.heuristics import BALD
from baal.bayesian.dropout import patch_module
from baal.transformers_trainer_wrapper import BaalTransformersTrainer
dataset = load_dataset("conll2003")
@Dref360
Dref360 / hf_overwrite_ds.py
Created April 30, 2023 17:23
Overwrite HugginFace Dataset
import os.path
import shutil
import tempfile
from datasets import Dataset, load_from_disk
PATH = '/tmp/b.arrow'
def overwrite_dataset(ds: Dataset, path) -> Dataset:
@Dref360
Dref360 / mc_dropout_context.py
Created April 1, 2023 22:17
Quick example to show how to activate and deactivate mcdropout
import numpy as np
import torch
from torchvision.models import vgg16
from baal.bayesian.dropout import MCDropoutModule
from baal.modelwrapper import ModelWrapper
model = vgg16()
wrapper = ModelWrapper(model, None)
input = torch.randn([2, 3, 64, 64])
@Dref360
Dref360 / README.md
Last active October 30, 2022 18:46
How to run Baal with HuggingFace on Label Studio

Baal with HuggingFace on Label Studio

Instructions to run Label Studio with Bayesian active learning on Text Classification.

Documentation Github

Environment:

  • export LABEL_STUDIO_HOSTNAME=http://localhost:8080
  • export LABEL_STUDIO_ML_BACKEND_V2=True
@Dref360
Dref360 / word_embeddings.py
Created May 26, 2022 14:43
Get most similar sentence by comparing to "important words"
from pprint import pprint
import datasets
import numpy as np
import torch
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel
"""
@Dref360
Dref360 / hf_training.py
Created March 30, 2022 15:41
Train a HF Pipeline on a dataset. Taken from their course.
import argparse
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from transformers import AutoTokenizer
from transformers import DataCollatorWithPadding
LABEL_COL = "label"
TEXT_COL = "text"
import gensim
import nltk
from gensim import corpora, models
from nltk.stem import WordNetLemmatizer, SnowballStemmer
import pandas as pd
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('stopwords')
@Dref360
Dref360 / baal_detection.py
Created February 12, 2022 17:31
Detection draft
from typing import List, Optional, Callable
import torch
from torch.optim import Adam
from torchvision.datasets.voc import VOCDetection
from torchvision.models.detection.ssd import ssd300_vgg16
from torchvision.transforms import Compose, Resize, ToTensor
from baal import ModelWrapper
@Dref360
Dref360 / README.md
Created November 21, 2021 17:25
Code for the blog post "Improving trust in text classification using HF and BaaL"

The code should run as is with the following dependencies:

pip install transformers datasets baal matplotlib tqdm