Skip to content

Instantly share code, notes, and snippets.

@holtgrewe
Created July 18, 2016 07:46
Show Gist options
  • Save holtgrewe/5517986d90fe551a76b6091340eda79e to your computer and use it in GitHub Desktop.
Save holtgrewe/5517986d90fe551a76b6091340eda79e to your computer and use it in GitHub Desktop.
Jannovar sources INI file for updated
; INI file with default data sources for Jannovar.
;
; This file is packed into the Jannovar JAR file.
;
; Each data source is described in one section. By convention, the section
; and data source name is given as "${organism}/${source}". Jannovar will
; serialize the file to "${name}.replace('/', '_').replace("\", "_").
;
; Users can provide their own INI files using the --data-source parameter
; to Jannovar, even orriding the defaults from this file.
;
; Each section contains a "type" entry that will make Jannovar use the
; appropriate DataSource and JannovarDataFactory type for downloading and
; parsing.
;
; Name / Contig ID Mapping
; ========================
;
; Independent of the type, the "chromInfo" and "chrToAccessions" entry are
; expected and give URLs to the "chromInfo.txt.gz" at UCSC describing the
; contig/chromosome lengths and "chrToAccessions" at RefSeq with the
; official contig names and RefSeq/GenBank identifiers.
;
; Transcript Files
; ================
;
; Keys for the types:
;
; * ucsc: knownGene, knownGeneMrna, kgXref, knownToLocusLink
; * ensembl: gtf, cdna, ncrna
; * refseq: gff, dna
;
; Aliasing
; ========
;
; The mitochondrial chromosome has the key "MT" in RefSeq but "M" in UCSC.
; Thus, we allow to create manual aliasing that is executed after loading the
; chrToAccessions file from RefSeq and before loading the chromInfo file from
; UCSC. Each line has the "alias" key. The value "MT,M,chrM" means that the
; contig names "M" and "chrM" are aliased to the contig name "MT" that is
; already known from the chrToAccessions file.
;
; Note that all alias lines are processed in linear order. If the name in the
; first entry of the list is not known yet then it is created with the next
; free number. This is useful for adding a adding the missing MT chromosome
; entry for hg18, for example.
; ---------------------------------------------------------------------------
; hg18/GRCh36
; ---------------------------------------------------------------------------
; HG18 from UCSC
[hg18/ucsc]
type=ucsc
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/Assembled_chromosomes/chr_NC_gi
chrToAccessions.format=chr_NC_gi
chrToAccessions.matchLast=HuRef
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/knownCanonical.txt.gz
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/knownGeneMrna.txt.gz
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/kgXref.txt.gz
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/knownToLocusLink.txt.gz
; HG19 from Ensembl
[hg18/ensembl]
type=ensembl
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/Assembled_chromosomes/chr_NC_gi
chrToAccessions.format=chr_NC_gi
chrToAccessions.matchLast=HuRef
gtf=ftp://ftp.ensembl.org/pub/release-54/gtf/homo_sapiens/Homo_sapiens.NCBI36.54.gtf.gz
cdna=ftp://ftp.ensembl.org/pub/release-54/fasta/homo_sapiens/cdna/Homo_sapiens.NCBI36.54.cdna.all.fa.gz
; HG18 from RefSeq
[hg18/refseq]
type=refseq
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/Assembled_chromosomes/chr_NC_gi
chrToAccessions.format=chr_NC_gi
chrToAccessions.matchLast=HuRef
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/GFF/ref_NCBI36_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/RNA/rna.fa.gz
; HG18 from RefSeq (only curated data sets)
[hg18/refseq_curated]
type=refseq
alias=MT,M,chrM
onlyCurated=true
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/Assembled_chromosomes/chr_NC_gi
chrToAccessions.format=chr_NC_gi
chrToAccessions.matchLast=HuRef
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/GFF/ref_NCBI36_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/RNA/rna.fa.gz
; ---------------------------------------------------------------------------
; hg19/GRCh37
; ---------------------------------------------------------------------------
; HG19 from UCSC
[hg19/ucsc]
type=ucsc
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format=chr_accessions
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownCanonical.txt.gz
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGeneMrna.txt.gz
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/kgXref.txt.gz
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownToLocusLink.txt.gz
; HG19 from Ensembl
[hg19/ensembl]
type=ensembl
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format=chr_accessions
gtf=ftp://ftp.ensembl.org/pub/release-74/gtf/homo_sapiens/Homo_sapiens.GRCh37.74.gtf.gz
cdna=ftp://ftp.ensembl.org/pub/release-74/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh37.74.cdna.all.fa.gz
; HG19 from RefSeq
[hg19/refseq]
type=refseq
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format=chr_accessions
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/GFF/ref_GRCh37.p13_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/RNA/rna.fa.gz
; HG19 from RefSeq (only curated data sets)
[hg19/refseq_curated]
type=refseq
alias=MT,M,chrM
onlyCurated=true
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format=chr_accessions
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/GFF/ref_GRCh37.p13_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/RNA/rna.fa.gz
; ---------------------------------------------------------------------------
; hg38/GRCh38
; ---------------------------------------------------------------------------
; HG38 from UCSC
[hg38/ucsc]
type=ucsc
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/Assembled_chromosomes/chr_accessions_GRCh38.p7
chrToAccessions.format=chr_accessions
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/knownCanonical.txt.gz
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/knownGene.txt.gz
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/knownGeneMrna.txt.gz
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/kgXref.txt.gz
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/knownToLocusLink.txt.gz
; HG38 from Ensembl
[hg38/ensembl]
type=ensembl
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/Assembled_chromosomes/chr_accessions_GRCh38.p7
chrToAccessions.format=chr_accessions
gtf=ftp://ftp.ensembl.org/pub/release-78/gtf/homo_sapiens/Homo_sapiens.GRCh38.78.gtf.gz
cdna=ftp://ftp.ensembl.org/pub/release-78/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
; HG38 from RefSeq
[hg38/refseq]
type=refseq
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/Assembled_chromosomes/chr_accessions_GRCh38.p7
chrToAccessions.format=chr_accessions
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GFF/ref_GRCh38.p2_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/RNA/rna.fa.gz
; HG38 from RefSeq (only curated data sets)
[hg38/refseq_curated]
type=refseq
alias=MT,M,chrM
onlyCurated=true
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/Assembled_chromosomes/chr_accessions_GRCh38.p7
chrToAccessions.format=chr_accessions
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GFF/ref_GRCh38.p2_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/RNA/rna.fa.gz
; ---------------------------------------------------------------------------
; mm9
; ---------------------------------------------------------------------------
; MM9 from UCSC
[mm9/ucsc]
type=ucsc
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/Assembled_chromosomes/chr_accessions_MGSCv37
chrToAccessions.format=chr_accessions
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/knownCanonical.txt.gz
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/knownGene.txt.gz
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/knownGeneMrna.txt.gz
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/kgXref.txt.gz
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/knownToLocusLink.txt.gz
; MM9 from Ensembl
[mm9/ensembl]
type=ensembl
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/Assembled_chromosomes/chr_accessions_MGSCv37
chrToAccessions.format=chr_accessions
gtf=ftp://ftp.ensembl.org/pub/release-67/gtf/mus_musculus/Mus_musculus.NCBIM37.67.gtf.gz
cdna=ftp://ftp.ensembl.org/pub/release-67/fasta/mus_musculus/cdna/Mus_musculus.NCBIM37.67.cdna.all.fa.gz
; MM9 from RefSeq
[mm9/refseq]
type=refseq
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/Assembled_chromosomes/chr_accessions_MGSCv37
chrToAccessions.format=chr_accessions
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/GFF/ref_MGSCv37_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/RNA/rna.fa.gz
; MM9 from RefSeq (only curated data sets)
[mm9/refseq_curated]
type=refseq
alias=MT,M,chrM
onlyCurated=true
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/Assembled_chromosomes/chr_accessions_MGSCv37
chrToAccessions.format=chr_accessions
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/GFF/ref_MGSCv37_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/RNA/rna.fa.gz
; ---------------------------------------------------------------------------
; mm10
; ---------------------------------------------------------------------------
; MM10 from UCSC
[mm10/ucsc]
type=ucsc
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/Assembled_chromosomes/chr_accessions_GRCm38.p3
chrToAccessions.format=chr_accessions
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/knownCanonical.txt.gz
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/knownGene.txt.gz
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/knownGeneMrna.txt.gz
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/kgXref.txt.gz
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/knownToLocusLink.txt.gz
; MM10 from Ensembl
[mm10/ensembl]
type=ensembl
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/Assembled_chromosomes/chr_accessions_GRCm38.p3
chrToAccessions.format=chr_accessions
gtf=ftp://ftp.ensembl.org/pub/release-74/gtf/mus_musculus/Mus_musculus.GRCm38.74.gtf.gz
cdna=ftp://ftp.ensembl.org/pub/release-74/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.74.cdna.all.fa.gz
; MM10 from RefSeq
[mm10/refseq]
type=refseq
alias=MT,M,chrM
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/Assembled_chromosomes/chr_accessions_GRCm38.p3
chrToAccessions.format=chr_accessions
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/GFF/ref_GRCm38.p3_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/RNA/rna.fa.gz
; MM10 from RefSeq (only curated data sets)
[mm10/refseq_curated]
type=refseq
alias=MT,M,chrM
onlyCurated=true
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromInfo.txt.gz
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/Assembled_chromosomes/chr_accessions_GRCm38.p3
chrToAccessions.format=chr_accessions
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/GFF/ref_GRCm38.p3_top_level.gff3.gz
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/RNA/rna.fa.gz
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment