Skip to content

Instantly share code, notes, and snippets.

@arq5x
Last active November 13, 2019 17:55
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save arq5x/9e1928638397ba45da2e to your computer and use it in GitHub Desktop.
Save arq5x/9e1928638397ba45da2e to your computer and use it in GitHub Desktop.
GEMINI Tutorial Commands
# assumes you have SSH'ed and qlogin'ed
cd thu
cd mydata
# slide 5
curl https://s3.amazonaws.com/gemini-tutorials/trio.trim.vep.vcf.gz > trio.trim.vep.vcf.gz
curl https://s3.amazonaws.com/gemini-tutorials/dominant.ped > dominant.ped
gemini load --cores 2 \
-v trio.trim.vep.vcf.gz \
-t VEP \
--tempdir . \
--skip-gene-tables \
-p dominant.ped \
trio.trim.vep.dominant.db
# slide 6
gemini autosomal_dominant --columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \
trio.trim.vep.dominant.db \
| head \
| column -t
# slide 7
gemini autosomal_dominant \
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \
trio.trim.vep.dominant.db \
| wc -l
# slide 8
gemini autosomal_dominant \
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \
--filter "(filter is NULL or filter=='SBFilter')" \
trio.trim.vep.dominant.db \
| wc -l
# slide 9
gemini autosomal_dominant \
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \
--filter "(filter is NULL or filter=='SBFilter') \
and impact_severity != 'LOW'" \
trio.trim.vep.dominant.db \
| wc -l
# slide 10
gemini autosomal_dominant \
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \
--filter "(filter is NULL or filter=='SBFilter') \
and impact_severity != 'LOW' \
and max_aaf_all < 0.005" \
trio.trim.vep.dominant.db \
| wc -l
# slide 11
gemini autosomal_dominant \
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \
--filter "(filter is NULL or filter=='SBFilter') \
and impact_severity == 'HIGH' \
and max_aaf_all < 0.005" \
trio.trim.vep.dominant.db \
| wc -l
# slide 14
gemini query \
-q "SELECT chrom, start, end, ref, alt, gene, impact, (gts).(*) \
FROM variants" \
--header \
--gt-filter "gt_types.4805 == HET \
and gt_types.1805 == HET \
and gt_types.1847 == HOM_REF" \
trio.trim.vep.dominant.db \
| head \
| column -t
# slide 16
gemini query \
-q "SELECT chrom, start, end, ref, alt, gene, impact, (gts).(*) \
FROM variants" \
--header \
--gt-filter "(gt_types).(phenotype==2).(==HET).(all) \
and (gt_types).(phenotype==1).(==HOM_REF).(all)" \
trio.trim.vep.dominant.db \
| head \
| column -t
# slide 17
gemini query \
-q "SELECT chrom, start, end, ref, alt, gene, impact, \
(gts).(*), (gt_depths).(*) \
FROM variants" \
--header \
--gt-filter "(gt_types).(phenotype==2).(==HET).(all) \
and (gt_types).(phenotype==1).(==HOM_REF).(all) \
and (gt_depths).(*).(>=20).(all)" \
trio.trim.vep.dominant.db \
| head \
| column -t
# slide 18
gemini query \
-q "SELECT chrom, start, end, ref, alt, gene, impact, \
(gts).(*), (gt_depths).(*) \
FROM variants \
WHERE (filter is NULL or filter=='SBFilter') \
and impact_severity == 'HIGH' \
and max_aaf_all < 0.005" \
--header \
--gt-filter "(gt_types).(phenotype==2).(==HET).(all) \
and (gt_types).(phenotype==1).(==HOM_REF).(all) \
and (gt_depths).(*).(>=20).(all)" \
trio.trim.vep.dominant.db \
| column -t
# assumes you have SSH'ed and qlogin'ed
# slide 12
cd thu
mkdir mydata
cd mydata
# slide 10
curl https://s3.amazonaws.com/gemini-tutorials/trio.trim.vep.vcf.gz > trio.trim.vep.vcf.gz
curl https://s3.amazonaws.com/gemini-tutorials/recessive.ped > recessive.ped
gemini load --cores 2 \
-v trio.trim.vep.vcf.gz \
-t VEP \
--tempdir . \
--skip-gene-tables \
-p recessive.ped \
trio.trim.vep.recessive.db
# slide 11
gemini comp_hets trio.trim.vep.recessive.db
# slide 12
gemini comp_hets --columns "chrom, start, end, gene, impact, cadd_raw" trio.trim.vep.recessive.db
# slide 13
gemini comp_hets \
--columns "chrom, start, end, gene, impact, cadd_raw" \
trio.trim.vep.recessive.db \
| head
gemini comp_hets \
--columns "chrom, start, end, gene, impact, cadd_raw" \
trio.trim.vep.recessive.db \
| wc -l
# slide 14
gemini comp_hets \
--columns "chrom, start, end, gene, impact, cadd_raw" \
--filter "(filter is NULL or filter=='SBFilter') \
and impact_severity != 'LOW'" \
trio.trim.vep.recessive.db \
| wc -l
# slide 15
gemini comp_hets \
--columns "chrom, start, end, gene, impact, cadd_raw" \
--filter "(filter is NULL or filter=='SBFilter') \
and impact_severity != 'LOW' \
and max_aaf_all < 0.005" \
trio.trim.vep.recessive.db \
| wc -l
# slide 16
gemini comp_hets \
--columns "chrom, start, end, gene, impact, cadd_raw" \
--filter "(filter is NULL or filter=='SBFilter') \
and impact_severity != 'LOW' \
and max_aaf_all < 0.005" \
trio.trim.vep.recessive.db \
# slide 17
gemini comp_hets \
--columns "chrom, start, end, gene, impact, cadd_raw" \
--filter "(filter is NULL or filter=='SBFilter') \
and impact_severity != 'LOW' \
and max_aaf_all < 0.005" \
-d 6 \
--min-gq 20 \
trio.trim.vep.recessive.db
# assumes you have SSH'ed and qlogin'ed
cd wed
cd mydata
# slide 17
# curl https://s3.amazonaws.com/gemini-tutorials/trio.trim.vep.vcf.gz > trio.trim.vep.vcf.gz
# curl https://s3.amazonaws.com/gemini-tutorials/denovo.ped > denovo.ped
# gemini load --cores 2 \
# -v trio.trim.vep.vcf.gz \
# -t VEP \
# --tempdir . \
# --skip-gene-tables --skip-cadd --skip-gerp-bp \
# -p denovo.ped \
# trio.trim.vep.denovo.db
curl http://home.chpc.utah.edu/~u1138933/gemini_db/trio.trim.vep.denovo.db > trio.trim.vep.denovo.db
# slide 19
gemini de_novo trio.trim.vep.denovo.db
# type Ctrl+C to stop output if you'd like (should take 20 seconds to complete)
# slide 21
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" trio.trim.vep.denovo.db
# slide 23
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" trio.trim.vep.denovo.db | wc -l
# slide 25
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \
-d 6 \
trio.trim.vep.denovo.db | wc -l
# slide 26
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \
-d 6 \
--min-gq 20 \
trio.trim.vep.denovo.db | wc -l
# slide 28
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \
-d 6 \
--min-gq 20 \
--filter "filter is NULL" \
trio.trim.vep.denovo.db | wc -l
# slide 30
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \
-d 6 \
--min-gq 20 \
--filter "(filter is NULL or filter=='SBFilter')" \
trio.trim.vep.denovo.db | wc -l
# slide 32
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \
-d 6 \
--min-gq 20 \
--filter "(filter is NULL or filter=='SBFilter') and impact_severity != 'LOW'" \
trio.trim.vep.denovo.db | wc -l
# slide 35
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \
-d 6 \
--min-gq 20 \
--filter "(filter is NULL or filter=='SBFilter') and impact_severity != 'LOW' and max_aaf_all <= 0.005" \
trio.trim.vep.denovo.db | wc -l
# slide 35
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \
-d 6 \
--min-gq 20 \
--filter "(filter is NULL or filter=='SBFilter') and impact_severity != 'LOW' and max_aaf_all <= 0.005" \
trio.trim.vep.denovo.db
# slide 7
mkdir wed
cd wed
mkdir mydata
cd mydata
curl https://s3.amazonaws.com/gemini-tutorials/learnSQL.db > learnSQL.db
curl https://s3.amazonaws.com/gemini-tutorials/learnSQL2.db > learnSQL2.db
curl https://s3.amazonaws.com/gemini-tutorials/chr22.VEP.vcf > chr22.VEP.vcf
curl https://s3.amazonaws.com/gemini-tutorials/trio.ped > trio.ped
# slide 9
gemini query -q "SELECT name FROM samples" learnSQL.db
# slide 10
gemini query -q "SELECT name FROM samples WHERE phenotype == 2" learnSQL.db
# slide 11
gemini query -q "SELECT name FROM samples WHERE phenotype <> 2" learnSQL.db
# slide 12
gemini query -q "SELECT name FROM samples WHERE sample_id < 3" learnSQL.db
# slide 13
gemini query -q "SELECT name FROM samples WHERE ethnicity IS NULL" learnSQL2.db
# slide 14
gemini query -q "SELECT name FROM samples WHERE ethnicity IS NOT NULL" learnSQL2.db
# slide 15
gemini query -q "SELECT * FROM fakevariants" learnSQL2.db
# slide 17
gemini query -q "SELECT chrom,start,end FROM fakevariants
WHERE in_dbsnp == 1" learnSQL2.db
gemini query -q "SELECT chrom,start,end FROM fakevariants
WHERE in_dbsnp" learnSQL2.db
# slide 18
gemini query -q "SELECT COUNT(*) FROM fakevariants
WHERE chrom == 'chr1' " learnSQL2.db
# slide 19
gemini query -q "SELECT COUNT(*) FROM fakevariants
WHERE chrom == 'chr1'
AND in_dbsnp == 0 " learnSQL2.db
# slide 21
#perl ~/software/variant_effect_predictor/variant_effect_predictor.pl -i chr22.vcf -o chr22.VEP.vcf --vcf \
#--cache --dir ~/software/variant_effect_predictor/references \
#--compress "gunzip -c" \
#--force_overwrite \
#--sift b --polyphen b --symbol --numbers --biotype --total_length \
#--fields Consequence,Codons,Amino_acids,Gene,SYMBOL,Feature,EXON,PolyPhen,SIFT,Protein_position,BIOTYPE
# slide 25
gemini load -v chr22.VEP.vcf \
-p trio.ped \
-t VEP \
--cores 2 \
--tempdir . \
--skip-gene-tables \
chr22.db
# slide 26
gemini db_info chr22.db
# slide 27
gemini query -q "SELECT name FROM samples" --header chr22.db
gemini query -q "SELECT * FROM samples" --header chr22.db
# slide 28
gemini query -q "SELECT COUNT(*) \
FROM variants \
WHERE in_dbsnp == 0" --header chr22.db
gemini query -q "SELECT COUNT(*) \
FROM variants \
WHERE filter is NULL" --header chr22.db
# slide 29
gemini query -q "SELECT * FROM variants WHERE
filter is NULL and gene = 'MLC1' " --header chr22.db
gemini query -q "SELECT rs_ids, aaf_esp_ea, impact, clinvar_disease_name, clinvar_sig
FROM variants
WHERE filter is NULL and gene = 'MLC1' " --header chr22.db
# slide 30
gemini query -q "SELECT COUNT(*) from variants WHERE clinvar_disease_name is not NULL and aaf_esp_ea <= 0.01" \
chr22.db
gemini query -q "SELECT gene from variants \
WHERE clinvar_disease_name is not NULL and aaf_esp_ea <= 0.01" \
chr22.db
# slide 32
gemini query -q "SELECT * from variants" \
--gt-filter "gt_types.1805 <> HOM_REF" \
--header \
chr22.db \
| wc -l
gemini query -q "SELECT * from variants" \
--gt-filter "(gt_types.1805 <> HOM_REF and \
gt_types.4805 <> HOM_REF)" \
chr22.db \
| wc -l
gemini query -q "SELECT gts.1805, gts.4805 from variants" \
--gt-filter "(gt_types.1805 <> HOM_REF and \
gt_types.4805 <> HOM_REF)" \
chr22.db
# slide 33 - wildcards
gemini query -q "SELECT chrom, start, end, ref, alt, \
gene, impact, (gts).(*) \
FROM variants" \
--gt-filter "(gt_types).(*).(==HET).(all)" \
--header \
chr22.db
gemini query -q "SELECT chrom, start, end, ref, alt, \
gene, impact, (gts).(*) \
FROM variants" \
--gt-filter "(gt_types).(sex==2).(==HOM_REF).(all)" \
--header \
chr22.db
# slide 34 - the "any" wildcard
gemini query -q "SELECT chrom, start, end, ref, alt, \
gene, impact, (gts).(*) \
FROM variants" \
--gt-filter "(gt_types).(sex==2).(!=HOM_REF).(any)" \
--header \
chr22.db
# slide 35 - the "none" wildcard
gemini query -q "SELECT chrom, start, end, ref, alt, \
gene, impact, (gts).(*) \
FROM variants" \
--gt-filter "(gt_types).(sex==2).(==HOM_REF).(none)" \
--header \
chr22.db
# slide 36 - the "count" wildcard
gemini query -q "SELECT chrom, start, end, ref, alt, \
gene, impact, (gts).(*) \
FROM variants" \
--gt-filter "(gt_types).(*).(==UNKNOWN).(count >= 2)" \
--header \
chr22.db
# slide 38 - wildcards are general to all genotype columns
gemini query -q "SELECT chrom, start, end, ref, alt, \
gene, impact, (gts).(*), (gt_depths).(*) \
FROM variants" \
--gt-filter "(gt_depths).(*).(>=50).(all)" \
--header \
chr22.db
# slide 39
gemini stats --gts-by-sample chr22.db | column -t
gemini stats --tstv chr22.db | column -t
# slide 40
gemini stats --summarize \
"SELECT * from variants WHERE in_dbsnp = 0" \
chr22.db | column -t
gemini stats --summarize \
"SELECT * from variants WHERE in_dbsnp = 1" \
chr22.db | column -t
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment