Created
January 13, 2023 16:23
-
-
Save andrewljohnson/1ce64b52c203557a412627d1cad1aa7f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from segments.huggingface import release2dataset | |
from segments import SegmentsClient | |
dataset_identifier = "andrewljohnson/magic_cards" | |
release_name = "v0.1" | |
api_key = "HIDDEN" | |
client = SegmentsClient(api_key) # this is different from blog, blog seemed outdated | |
release = client.get_release(dataset_identifier, release_name) | |
hf_dataset = release2dataset(release) | |
from segments.utils import get_semantic_bitmap | |
def convert_segmentation_bitmap(example): | |
return { | |
"label.segmentation_bitmap": | |
get_semantic_bitmap( | |
example["label.segmentation_bitmap"], | |
example["label.annotations"], | |
id_increment=0, | |
) | |
} | |
semantic_dataset = hf_dataset.map( | |
convert_segmentation_bitmap, | |
) | |
semantic_dataset = semantic_dataset.rename_column('image', 'pixel_values') | |
semantic_dataset = semantic_dataset.rename_column('label.segmentation_bitmap', 'label') | |
semantic_dataset = semantic_dataset.remove_columns(['name', 'uuid', 'status', 'label.annotations']) | |
semantic_dataset.push_to_hub(dataset_identifier) | |
from datasets import load_dataset | |
ds = load_dataset(dataset_identifier) | |
ds = ds.shuffle(seed=1) | |
ds = ds["train"].train_test_split(test_size=0.2) | |
train_ds = ds["train"] | |
test_ds = ds["test"] | |
import json | |
from huggingface_hub import hf_hub_download | |
repo_id = f"datasets/{dataset_identifier}" | |
filename = "id2label.json" | |
id2label = open(hf_hub_download(repo_id=dataset_identifier, filename=filename, repo_type="dataset"), "r") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment