acharles7/dataset_info.json

## dataset_info.json
{
  "citation": "@article{raecompressive2019,\nauthor = {Rae, Jack W and Potapenko, Anna and Jayakumar, Siddhant M and\n          Hillier, Chloe and Lillicrap, Timothy P},\ntitle = {Compressive Transformers for Long-Range Sequence Modelling},\njournal = {arXiv preprint},\nurl = {https://arxiv.org/abs/1911.05507},\nyear = {2019},\n}",
  "description": "This dataset contains the PG-19 language modeling benchmark. It includes a set\nof books extracted from the Project Gutenberg books project\n(https://www.gutenberg.org), that were published before 1919. It also contains\nmetadata of book titles and publication dates.\nPG-19 is over double the size of the Billion Word benchmark and contains\ndocuments that are 20X longer, on average, than the WikiText long-range\nlanguage modelling benchmark.\n\nBooks are partitioned into a train, validation, and test set. Books metadata is\nstored in metadata.csv which contains\n(book_id, short_book_title, publication_date, book_link).",
  "location": {
    "urls": [
      "https://github.com/deepmind/pg19"
    ]
  },
  "name": "pg19",
  "splits": [
    {
      "name": "test",
      "numBytes": "41308494",
      "shardLengths": [
        "100"
      ]
    },
    {
      "name": "validation",
      "numBytes": "17742608",
      "shardLengths": [
        "50"
      ]
    }
  ],
  "version": "0.1.0"
}
	{
	"citation": "@article{raecompressive2019,\nauthor = {Rae, Jack W and Potapenko, Anna and Jayakumar, Siddhant M and\n Hillier, Chloe and Lillicrap, Timothy P},\ntitle = {Compressive Transformers for Long-Range Sequence Modelling},\njournal = {arXiv preprint},\nurl = {https://arxiv.org/abs/1911.05507},\nyear = {2019},\n}",
	"description": "This dataset contains the PG-19 language modeling benchmark. It includes a set\nof books extracted from the Project Gutenberg books project\n(https://www.gutenberg.org), that were published before 1919. It also contains\nmetadata of book titles and publication dates.\nPG-19 is over double the size of the Billion Word benchmark and contains\ndocuments that are 20X longer, on average, than the WikiText long-range\nlanguage modelling benchmark.\n\nBooks are partitioned into a train, validation, and test set. Books metadata is\nstored in metadata.csv which contains\n(book_id, short_book_title, publication_date, book_link).",
	"location": {
	"urls": [
	"https://github.com/deepmind/pg19"
	]
	},
	"name": "pg19",
	"splits": [
	{
	"name": "test",
	"numBytes": "41308494",
	"shardLengths": [
	"100"
	]
	},
	{
	"name": "validation",
	"numBytes": "17742608",
	"shardLengths": [
	"50"
	]
	}
	],
	"version": "0.1.0"
	}