Skip to content

Instantly share code, notes, and snippets.

@hammer
Last active July 8, 2019 20:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hammer/fb48c68c1a0d15b1f054730a63719c6c to your computer and use it in GitHub Desktop.
Save hammer/fb48c68c1a0d15b1f054730a63719c6c to your computer and use it in GitHub Desktop.
Configuration organized by ES index; note that this will not parse correctly! It's for purely pedagogical purposes.
# [invalid-]evidence-data
evidences:
gs_output_dir: evidence-files
downloads:
- bucket: otar000-evidence_input/CRISPR/json
output_filename: crispr-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar006-reactome
output_filename: reactome-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/uniprot/
- bucket: otar007-cosmic
output_filename: cosmic-{suffix}.json.gz
excludes: hallmarks
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar008-chembl
output_filename: chembl-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/uniprot/
- bucket: otar009-gwas
output_filename: gwas-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar010-atlas
output_filename: atlas-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar011-uniprot
output_filename: uniprot-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/uniprot/
- bucket: otar012-eva
output_filename: eva-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar025-epmc
output_filename: epmc-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/uniprot/
- bucket: otar000-evidence_input/Gene2Phenotype/json
output_filename: gene2phenotype-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar000-evidence_input/GenomicsEngland/json
output_filename: genomics_england-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar000-evidence_input/IntOgen/json
output_filename: intogen-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar000-evidence_input/PhenoDigm/json
output_filename: phenodigm-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar000-evidence_input/PheWAS/json
output_filename: phewas_catalog-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar000-evidence_input/PROGENy/json
output_filename: progeny-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar000-evidence_input/SLAPEnrich/json
output_filename: slapenrich-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
- bucket: otar000-evidence_input/SysBio/json
output_filename: sysbio-{suffix}.json.gz
resource: input-file
subset_key:
- target
- id
subset_prefix: http://identifiers.org/ensembl/
annotations:
gs_output_dir: annotation-files
downloads:
- uri: https://raw.githubusercontent.com/opentargets/data_pipeline/master/mrtarget/resources/eco_scores.tsv
output_filename: eco_scores-{suffix}.tsv
resource: eco-scores
# expression-data
- uri: https://raw.githubusercontent.com/opentargets/expression_hierarchy/master/process/map_with_efos.json
output_filename: map_with_efos-{suffix}.json
resource: tissue-translation-map
- uri: https://raw.githubusercontent.com/opentargets/expression_hierarchy/master/process/curation.tsv
output_filename: expression_hierarchy_curation-{suffix}.tsv
resource: tissue-curation-map
- uri: https://www.proteinatlas.org/download/normal_tissue.tsv.zip
output_filename: normal_tissue-{suffix}.tsv.zip
resource: hpa-normal-tissue
- uri: https://storage.googleapis.com/atlas_baseline_expression/expatlas.blueprint2.baseline.binned_v2.tsv
output_filename: expatlas.blueprint2.baseline.binned_v2-{suffix}.tsv
resource: hpa-rna-level
- uri: https://storage.googleapis.com/atlas_baseline_expression/exp_summary_NormCounts_genes_all_Blueprint2_v2.txt
output_filename: exp_summary_NormCounts_genes_all_Blueprint2_v2-{suffix}.txt
resource: hpa-rna-value
- uri: https://storage.googleapis.com/atlas_baseline_expression/expatlas.blueprint2.baseline.z-score.binned_v2.tsv
output_filename: expatlas.blueprint2.baseline.z-score.binned_v2-{suffix}.tsv
resource: hpa-rna-zscore
# efo-data
annotations:
gs_output_dir: annotation-files
downloads:
- uri: https://sourceforge.net/p/efo/code/HEAD/tree/trunk/src/efoassociations/ibd_2_pheno_associations.owl?format=raw
output_filename: ibd_2_pheno_associations-{suffix}.owl
resource: disease-phenotype
- uri: https://sourceforge.net/p/efo/code/HEAD/tree/trunk/src/efoassociations/immune_disease_2_pheno.owl?format=raw
output_filename: immune_disease_2_pheno-{suffix}.owl
resource: disease-phenotype
- uri: https://sourceforge.net/p/efo/code/HEAD/tree/trunk/src/efoassociations/rareAlbuminuria_associations_03Jun15.owl?format=raw
output_filename: rareAlbuminuria_associations_03Jun15-{suffix}.owl
resource: disease-phenotype
- uri: https://sourceforge.net/p/efo/code/HEAD/tree/trunk/src/efoassociations/rareIBDpheno.owl?format=raw
output_filename: rareIBDpheno-{suffix}.owl
resource: disease-phenotype
- uri: https://sourceforge.net/p/efo/code/HEAD/tree/trunk/src/efoassociations/ordo_hpo_mappings.owl?format=raw
output_filename: ordo_hpo_mappings-{suffix}.owl
resource: disease-phenotype
- uri: https://sourceforge.net/p/efo/code/HEAD/tree/trunk/src/efoassociations/charite_HP_ORDO_07Oct15.owl?format=raw
output_filename: charite_HP_ORDO_07Oct15-{suffix}.owl
resource: disease-phenotype
# gene-data Plugin: Safety
known_target_safety:
output_filename: known_target_safety-{suffix}.json
resource: safety
gs_output_dir: annotation-files
spreadsheets:
- gkey: 1EvpcnUkDASUNoBU5PzQPGD5YtZxh7cgotr2MqClJ7t0
gid: 1760619926
output_filename: adverse_effects.tsv
s_type: adr
- gkey: 1EvpcnUkDASUNoBU5PzQPGD5YtZxh7cgotr2MqClJ7t0
gid: 650742396
output_filename: safety_risk_information.tsv
s_type: sri
- gkey: 1EvpcnUkDASUNoBU5PzQPGD5YtZxh7cgotr2MqClJ7t0
gid: 1774810192
output_filename: UBERON_mapping.tsv
s_type: ubr
- gkey: 1EvpcnUkDASUNoBU5PzQPGD5YtZxh7cgotr2MqClJ7t0
gid: 1717378859
output_filename: EFO_mapping.tsv
s_type: efo
- gkey: 1EvpcnUkDASUNoBU5PzQPGD5YtZxh7cgotr2MqClJ7t0
gid: 1223815020
output_filename: references.tsv
s_type: ref
# gene-data Plugin: Tractability
annotations_from_buckets:
gs_output_dir: annotation-files
downloads:
- bucket: otar001-core/Tractability
output_filename: tractability_buckets-{suffix}.tsv
resource: tractability
# gene-data Plugin: ChemicalProbes
chemical_probes:
output_filename: chemicalprobes_portalprobes-{suffix}.tsv
resource: chemical-probes-1
gs_output_dir: annotation-files
spreadsheets:
- gkey: 1VL3eHGpvJMaQ7LYp0Kp1plEo-NueP3CT7FUCrW0p2dg
gid: 1686598114
output_filename: chemical_probes_SGC.tsv
cp_type: sgc
- gkey: 1VL3eHGpvJMaQ7LYp0Kp1plEo-NueP3CT7FUCrW0p2dg
gid: 1636863333
output_filename: chemical_probes_portal.tsv
cp_type: cpp
- gkey: 1VL3eHGpvJMaQ7LYp0Kp1plEo-NueP3CT7FUCrW0p2dg
gid: 349856989
output_filename: open_science_probes.tsv
cp_type: osp
annotations_from_buckets:
gs_output_dir: annotation-files
downloads:
- bucket: otar001-core/ProbeMiner/annotation
output_filename: chemicalprobes_probeminer-{suffix}.tsv
resource: chemical-probes-2
# gene-data Plugin: CancerBiomarkers
annotations_from_buckets:
gs_output_dir: annotation-files
downloads:
- bucket: otar001-core/CancerBiomarkers/annotation
output_filename: cancerbiomarkers-{suffix}.tsv
resource: biomarker
# gene-data Plugin: Hallmarks
annotations_from_buckets:
gs_output_dir: annotation-files
downloads:
- bucket: otar007-cosmic
output_filename: cosmic-hallmarks-{suffix}.tsv.gz
resource: hallmark
includes: hallmarks
# gene-data Plugin: MousePhenotypes
annotations:
gs_output_dir: annotation-files
downloads:
- uri: http://www.informatics.jax.org/downloads/reports/HMD_HumanPhenotype.rpt
output_filename: HMD_HumanPhenotype-{suffix}.rpt
resource: mouse-phenotypes-orthology
- uri: http://www.informatics.jax.org/downloads/reports/MGI_PhenoGenoMP.rpt
output_filename: MGI_PhenoGenoMP-{suffix}.rpt
resource: mouse-phenotypes-report
# drug-data
# gene-data Plugin: ChEMBL
ChEMBL:
gs_output_dir: annotation-files
downloads:
drug:
uri: https://www.ebi.ac.uk/chembl/api/data/drug_indication.json
output_filename: chembl_drug_indication_rest_api-{suffix}.json
resource: chembl-drug-indication
molecule:
uri: https://www.ebi.ac.uk/chembl/api/data/molecule.json
output_filename: chembl_molecule_rest_api-{suffix}.json
resource: chembl-molecule
target:
uri: https://www.ebi.ac.uk/chembl/api/data/target.json
output_filename: chembl_target_rest_api-{suffix}.json
resource: chembl-target
mechanism:
uri: https://www.ebi.ac.uk/chembl/api/data/mechanism.json
output_filename: chembl_mechanism_rest_api-{suffix}.json
resource: chembl-mechanism
target_component:
uri: https://www.ebi.ac.uk/chembl/api/data/target_component.json
output_filename: chembl_target_component_rest_api-{suffix}.json
resource: chembl-component
protein_class:
uri: https://www.ebi.ac.uk/chembl/api/data/protein_class.json
output_filename: chembl_protein_class_rest_api-{suffix}.json
resource: chembl-protein
# gene-data Plugin: Orthologs
annotations:
gs_output_dir: annotation-files
downloads:
- uri: http://ftp.ebi.ac.uk/pub/databases/genenames/hcop/human_all_hcop_sixteen_column.txt.gz
output_filename: human_all_hcop_sixteen_column-{suffix}.txt.gz
resource: hgnc-orthologs
# gene-data Plugin: HGNC
annotations:
gs_output_dir: annotation-files
downloads:
- uri: http://ftp.ebi.ac.uk/pub/databases/genenames/new/json/hgnc_complete_set.json
output_filename: hgnc_complete_set-{suffix}.json
resource: hgnc-complete-set
# reactome-data
annotations:
gs_output_dir: annotation-files
downloads:
- uri: https://www.reactome.org/download/current/ReactomePathways.txt
output_filename: ReactomePathways-{suffix}.txt
resource: reactome-pathway-data
- uri: https://www.reactome.org/download/current/ReactomePathwaysRelation.txt
output_filename: ReactomePathwaysRelation-{suffix}.txt
resource: reactome-pathway-relation
# uniprot-data
annotations:
gs_output_dir: annotation-files
downloads:
- uri: https://www.uniprot.org/uniprot/?query=reviewed%3Ayes%2BAND%2Borganism%3A9606&compress=yes&format=xml
output_filename: uniprot-{suffix}.xml.gz
resource: uniprot-uri
# ensembl-data
ensembl:
resource: ensembl-filename
conda_env: backend-genetics
conda_create: bash -c "~/anaconda2/bin/conda env create -f {root_dir}/scripts/ensembl/environment.yaml"
python_script: python {script_path}/create_genes_dictionary.py -o {ensembl_output_dir}/.
-e -z -n
ensembl_release: homo_sapiens_core_96_38
extension_file: _genes.json.gz
gs_output_dir: annotation-files
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment