Avsecz/dataloader.yaml

## dataloader.yaml
# Where the dataloader is implemented and in what form
type: Dataset
defined_as: dataloader.py::SeqDataset

# Arguments of the dataloader
args:
    intervals_file:
        doc: tsv file containing dna interval indices (chr, start, end) and (optonally) binary 0/1 labels
        example: example_files/intervals_files.tsv
    fasta_file:
        doc: chr21 fasta file for dna intervals
        example: example_files/chr21.fa

# information about the authors (optional)
info:
    authors:
        - name: John Doe
    doc: "Dataloader description"

# required packages
dependencies:
    conda:  # conda dependencies
        - bioconda::genomelake
        - bioconda::pybedtools
        - numpy
        - pandas
    pip:    # pip dependencies
        - concise

# what arrays does dataloader return
output_schema:  # dataloaders returns a dictionary of {"inputs": .., "targets": ..., "metadata": ...}
    inputs:
        shape: (1000,4)
        doc: "One-hot encoded DNA sequence"
    targets:
        shape: (None,)
        doc: >
          Optional. Binary 0/1 class labels
    metadata:
        ranges:
            type: GenomicRanges
            doc: ranges for input
	# Where the dataloader is implemented and in what form
	type: Dataset
	defined_as: dataloader.py::SeqDataset

	# Arguments of the dataloader
	args:
	intervals_file:
	doc: tsv file containing dna interval indices (chr, start, end) and (optonally) binary 0/1 labels
	example: example_files/intervals_files.tsv
	fasta_file:
	doc: chr21 fasta file for dna intervals
	example: example_files/chr21.fa

	# information about the authors (optional)
	info:
	authors:
	- name: John Doe
	doc: "Dataloader description"

	# required packages
	dependencies:
	conda: # conda dependencies
	- bioconda::genomelake
	- bioconda::pybedtools
	- numpy
	- pandas
	pip: # pip dependencies
	- concise

	# what arrays does dataloader return
	output_schema: # dataloaders returns a dictionary of {"inputs": .., "targets": ..., "metadata": ...}
	inputs:
	shape: (1000,4)
	doc: "One-hot encoded DNA sequence"
	targets:
	shape: (None,)
	doc: >
	Optional. Binary 0/1 class labels
	metadata:
	ranges:
	type: GenomicRanges
	doc: ranges for input