Instructions to run Label Studio with Bayesian active learning on Text Classification.
Environment:
export LABEL_STUDIO_HOSTNAME=http://localhost:8080
export LABEL_STUDIO_ML_BACKEND_V2=True
# Wav2Vec in Baal | |
from datasets import load_dataset | |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, TrainingArguments | |
from baal.active.heuristics import BALD | |
from baal.bayesian.dropout import patch_module | |
from baal.transformers_trainer_wrapper import BaalTransformersTrainer | |
# load model and tokenizer |
from datasets import load_dataset | |
from transformers import pipeline, DataCollatorForTokenClassification | |
from baal.active.active_loop import ActiveLearningLoop | |
from baal.active.dataset import ActiveLearningDataset | |
from baal.active.heuristics import BALD | |
from baal.bayesian.dropout import patch_module | |
from baal.transformers_trainer_wrapper import BaalTransformersTrainer | |
dataset = load_dataset("conll2003") |
import os.path | |
import shutil | |
import tempfile | |
from datasets import Dataset, load_from_disk | |
PATH = '/tmp/b.arrow' | |
def overwrite_dataset(ds: Dataset, path) -> Dataset: |
import numpy as np | |
import torch | |
from torchvision.models import vgg16 | |
from baal.bayesian.dropout import MCDropoutModule | |
from baal.modelwrapper import ModelWrapper | |
model = vgg16() | |
wrapper = ModelWrapper(model, None) | |
input = torch.randn([2, 3, 64, 64]) |
Instructions to run Label Studio with Bayesian active learning on Text Classification.
Environment:
export LABEL_STUDIO_HOSTNAME=http://localhost:8080
export LABEL_STUDIO_ML_BACKEND_V2=True
from pprint import pprint | |
import datasets | |
import numpy as np | |
import torch | |
from sklearn.metrics.pairwise import cosine_similarity | |
from tqdm import tqdm | |
from transformers import AutoTokenizer, AutoModel | |
""" |
import argparse | |
from datasets import load_dataset | |
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer | |
from transformers import AutoTokenizer | |
from transformers import DataCollatorWithPadding | |
LABEL_COL = "label" | |
TEXT_COL = "text" |
import gensim | |
import nltk | |
from gensim import corpora, models | |
from nltk.stem import WordNetLemmatizer, SnowballStemmer | |
import pandas as pd | |
nltk.download('wordnet') | |
nltk.download('omw-1.4') | |
nltk.download('stopwords') |
from typing import List, Optional, Callable | |
import torch | |
from torch.optim import Adam | |
from torchvision.datasets.voc import VOCDetection | |
from torchvision.models.detection.ssd import ssd300_vgg16 | |
from torchvision.transforms import Compose, Resize, ToTensor | |
from baal import ModelWrapper |
The code should run as is with the following dependencies:
pip install transformers datasets baal matplotlib tqdm