tuetschek/dataset_info.json

## dataset_info.json
{
  "citation": "@inproceedings{dusek_neural_2019,\n        author = {Du\u0161ek, Ond\u0159ej and Jur\u010d\u00ed\u010dek, Filip},\n        title = {Neural {Generation} for {Czech}: {Data} and {Baselines}},\n        shorttitle = {Neural   {Generation} for {Czech}},\n        url = {https://www.aclweb.org/anthology/W19-8670/},\n        urldate = {2019-10-18},\n        booktitle = {Proceedings of the 12th {International} {Conference} on {Natural} {Language} {Generation}     ({INLG} 2019)},\n        month = oct,\n        address = {Tokyo, Japan},\n        year = {2019},\n        pages = {563--574},\n        abstract = {We present the first dataset targeted at end-to-end NLG in Czech in the restaurant        domain, along with several strong baseline models using the sequence-to-sequence approach. While non-English NLG is under-explored in general, Czech, as a morphologically rich language, makes the task even harder: Since Czech requires   inflecting named entities, delexicalization or copy mechanisms do not work out-of-the-box and lexicalizing the generated outputs is non-trivial. In our experiments, we present two different approaches to this this problem: (1) using a   neural language model to select the correct inflected form while lexicalizing, (2) a two-step generation setup: our sequence-to-sequence model generates an interleaved sequence of lemmas and morphological tags, which are then inflected  by a morphological generator.},\n}",
  "description": "Czech data-to-text dataset in the restaurant domain. The input meaning representations\ncontain a dialogue act type (inform, confirm etc.), slots (food, area, etc.) and their values.\nIt originated as a translation of  the English San Francisco Restaurants dataset by Wen et al. (2015).",
  "downloadSize": "1463019",
  "location": {
    "urls": [
      "https://github.com/UFAL-DSG/cs_restaurant_dataset"
   ]
  },
  "moduleName": "cs_restaurants",
  "name": "cs_restaurants",
  "splits": [
    {
      "name": "train",
      "numBytes": "1720061",
      "shardLengths": [
        "3569"
      ]
    },
    {
      "name": "validation",
      "numBytes": "415919",
      "shardLengths": [
        "781"
      ]
    },
    {
      "name": "test",
      "numBytes": "444758",
      "shardLengths": [
        "842"
      ]
    }
  ],
  "supervisedKeys": {
    "input": "input_text",
    "output": "target_text"
  },
  "version": "0.1.0"
}
	{
	"citation": "@inproceedings{dusek_neural_2019,\n author = {Du\u0161ek, Ond\u0159ej and Jur\u010d\u00ed\u010dek, Filip},\n title = {Neural {Generation} for {Czech}: {Data} and {Baselines}},\n shorttitle = {Neural {Generation} for {Czech}},\n url = {https://www.aclweb.org/anthology/W19-8670/},\n urldate = {2019-10-18},\n booktitle = {Proceedings of the 12th {International} {Conference} on {Natural} {Language} {Generation} ({INLG} 2019)},\n month = oct,\n address = {Tokyo, Japan},\n year = {2019},\n pages = {563--574},\n abstract = {We present the first dataset targeted at end-to-end NLG in Czech in the restaurant domain, along with several strong baseline models using the sequence-to-sequence approach. While non-English NLG is under-explored in general, Czech, as a morphologically rich language, makes the task even harder: Since Czech requires inflecting named entities, delexicalization or copy mechanisms do not work out-of-the-box and lexicalizing the generated outputs is non-trivial. In our experiments, we present two different approaches to this this problem: (1) using a neural language model to select the correct inflected form while lexicalizing, (2) a two-step generation setup: our sequence-to-sequence model generates an interleaved sequence of lemmas and morphological tags, which are then inflected by a morphological generator.},\n}",
	"description": "Czech data-to-text dataset in the restaurant domain. The input meaning representations\ncontain a dialogue act type (inform, confirm etc.), slots (food, area, etc.) and their values.\nIt originated as a translation of the English San Francisco Restaurants dataset by Wen et al. (2015).",
	"downloadSize": "1463019",
	"location": {
	"urls": [
	"https://github.com/UFAL-DSG/cs_restaurant_dataset"
	]
	},
	"moduleName": "cs_restaurants",
	"name": "cs_restaurants",
	"splits": [
	{
	"name": "train",
	"numBytes": "1720061",
	"shardLengths": [
	"3569"
	]
	},
	{
	"name": "validation",
	"numBytes": "415919",
	"shardLengths": [
	"781"
	]
	},
	{
	"name": "test",
	"numBytes": "444758",
	"shardLengths": [
	"842"
	]
	}
	],
	"supervisedKeys": {
	"input": "input_text",
	"output": "target_text"
	},
	"version": "0.1.0"
	}