Skip to content

Instantly share code, notes, and snippets.

@jbrry
jbrry / print_batch_labels.py
Created October 21, 2021 15:23
debug labels in batch
# replace function: https://github.com/huggingface/transformers/blob/f9c16b02e3f5d2ee0a1cadb6f50dc9e3281e2536/src/transformers/data/data_collator.py#L78
def torch_default_data_collator(features: List[InputDataClass]) -> Dict[str, Any]:
"""place this function in transformers/data/data_collator.py"""
import torch
if not isinstance(features[0], (dict, BatchEncoding)):
features = [vars(f) for f in features]
first = features[0]
batch = {}
@jbrry
jbrry / concatenate_huggingface_datasets.py
Last active October 1, 2021 12:46
Downloads HuggingFace datasets and concatenates them based on split type.
"""Downloads HuggingFace datasets and concatenates them based on split type."""
import datasets
from datasets import concatenate_datasets, load_dataset
from datasets.dataset_dict import DatasetDict
# `config_name`s for the `universal_dependencies` dataset
TBIDS = [
"af_afribooms",
@jbrry
jbrry / extract_scalars.py
Created November 3, 2020 18:32 — forked from wchargin/extract_scalars.py
Extract scalars to CSV using the TensorBoard event multiplexer API
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import csv
import errno
import os
import re