Skip to content

Instantly share code, notes, and snippets.

@andrewljohnson
Created January 13, 2023 16:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andrewljohnson/1ce64b52c203557a412627d1cad1aa7f to your computer and use it in GitHub Desktop.
Save andrewljohnson/1ce64b52c203557a412627d1cad1aa7f to your computer and use it in GitHub Desktop.
from segments.huggingface import release2dataset
from segments import SegmentsClient
dataset_identifier = "andrewljohnson/magic_cards"
release_name = "v0.1"
api_key = "HIDDEN"
client = SegmentsClient(api_key) # this is different from blog, blog seemed outdated
release = client.get_release(dataset_identifier, release_name)
hf_dataset = release2dataset(release)
from segments.utils import get_semantic_bitmap
def convert_segmentation_bitmap(example):
return {
"label.segmentation_bitmap":
get_semantic_bitmap(
example["label.segmentation_bitmap"],
example["label.annotations"],
id_increment=0,
)
}
semantic_dataset = hf_dataset.map(
convert_segmentation_bitmap,
)
semantic_dataset = semantic_dataset.rename_column('image', 'pixel_values')
semantic_dataset = semantic_dataset.rename_column('label.segmentation_bitmap', 'label')
semantic_dataset = semantic_dataset.remove_columns(['name', 'uuid', 'status', 'label.annotations'])
semantic_dataset.push_to_hub(dataset_identifier)
from datasets import load_dataset
ds = load_dataset(dataset_identifier)
ds = ds.shuffle(seed=1)
ds = ds["train"].train_test_split(test_size=0.2)
train_ds = ds["train"]
test_ds = ds["test"]
import json
from huggingface_hub import hf_hub_download
repo_id = f"datasets/{dataset_identifier}"
filename = "id2label.json"
id2label = open(hf_hub_download(repo_id=dataset_identifier, filename=filename, repo_type="dataset"), "r")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment