Skip to content

Instantly share code, notes, and snippets.

@Helw150
Created September 16, 2022 15:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Helw150/7ebba27c8ab1cba79073753b22899804 to your computer and use it in GitHub Desktop.
Save Helw150/7ebba27c8ab1cba79073753b22899804 to your computer and use it in GitHub Desktop.
Lab Meeting Dataset upload Code
# See https://huggingface.co/docs/datasets/upload_dataset for more details
from datasets import load_dataset
dataset_name = "PUT_YOUR_NAME_HERE"
data_files = {"train": "train.csv", "dev": "dev.csv", "test": "test.csv"}
dataset = load_dataset("namespace/your_dataset_name", data_files=data_files)
datasets.push_to_hub(f"SALT-NLP/{dataset_name}", private=True)
from datasets import load_dataset
dataset_name = "PUT_YOUR_NAME_HERE"
transform = #PUT SOME PREPROCESSING FUNCTION HERE
raw_datasets = load_dataset(
"glue",
data_args.task_name,
cache_dir=cache_name,
use_auth_token=True if model_args.use_auth_token else None,
)
dialect_datasets = raw_datasets.map(
transform,
batched=True,
num_proc=24,
desc="Transform Dataset",
)
datasets.push_to_hub(f"SALT-NLP/{dataset_name}", private=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment