Last active
May 17, 2017 16:21
-
-
Save dbolser-ebi/21ca0a6ec094a6429ded698416d61895 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> desc variation_feature; | |
... | |
| consequence_types | set('intergenic_variant','splice_acceptor_variant','splice_donor_variant','stop_lost','coding_sequence_variant','missense_variant','stop_gained','synonymous_variant','frameshift_variant','non_coding_transcript_variant','non_coding_transcript_exon_variant','mature_miRNA_variant','NMD_transcript_variant','5_prime_UTR_variant','3_prime_UTR_variant','incomplete_terminal_codon_variant','intron_variant','splice_region_variant','downstream_gene_variant','upstream_gene_variant','start_lost','stop_retained_variant','inframe_insertion','inframe_deletion','transcript_ablation','transcript_fusion','transcript_amplification','transcript_translocation','TFBS_ablation','TFBS_fusion','TFBS_amplification','TFBS_translocation','regulatory_region_ablation','regulatory_region_fusion','regulatory_region_amplification','regulatory_region_translocation','feature_elongation','feature_truncation','regulatory_region_variant','TF_binding_site_variant','protein_altering_variant') | NO | MUL | intergenic_variant | | | |
... | |
## Parsing | |
DESC variation_feature; | |
## gives: | |
1 'intergenic_variant', | |
2 'splice_acceptor_variant', | |
3 'splice_donor_variant', | |
4 'stop_lost', | |
5 'coding_sequence_variant', | |
6 'missense_variant', | |
7 'stop_gained', | |
8 'synonymous_variant', | |
9 'frameshift_variant', | |
10 'non_coding_transcript_variant', | |
11 'non_coding_transcript_exon_variant', | |
12 'mature_miRNA_variant', | |
13 'NMD_transcript_variant', | |
14 '5_prime_UTR_variant', | |
... | |
My attempted query... | |
SELECT | |
COUNT(*) AS N, | |
COUNT(DISTINCT variation_feature_id) AS U, | |
SUM( IF(consequence_types & 1, 1, 0)) AS intergenic_variant, | |
SUM( IF(consequence_types & 2, 1, 0)) AS splice_acceptor_variant, | |
SUM( IF(consequence_types & 4, 1, 0)) AS splice_donor_variant | |
FROM | |
transcript_variation | |
Which gives: | |
+---------+---------+----------------------+---------------------------+------------------------+ | |
| N | U | intergenic_variant | splice_acceptor_variant | splice_donor_variant | | |
|---------+---------+----------------------+---------------------------+------------------------| | |
| 1721493 | 1524687 | 1262 | 1321 | 879 | | |
+---------+---------+----------------------+---------------------------+------------------------+ | |
1 row in set | |
Time: 6.353s | |
SELECT | |
COUNT(*) AS N, | |
COUNT(DISTINCT variation_feature_id) AS U, | |
COUNT(DISTINCT IF(consequence_types & 1, variation_feature_id, NULL)) AS intergenic_variant, | |
COUNT(DISTINCT IF(consequence_types & 2, variation_feature_id, NULL)) AS splice_acceptor_variant, | |
COUNT(DISTINCT IF(consequence_types & 4, variation_feature_id, NULL)) AS splice_donor_variant | |
FROM | |
transcript_variation; | |
+---------+---------+----------------------+---------------------------+------------------------+ | |
| N | U | intergenic_variant | splice_acceptor_variant | splice_donor_variant | | |
|---------+---------+----------------------+---------------------------+------------------------| | |
| 1721493 | 1524687 | 1245 | 1310 | 867 | | |
+---------+---------+----------------------+---------------------------+------------------------+ | |
1 row in set | |
Time: 4.389s | |
BUT: | |
> SELECT COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U FROM transcript_variation WHERE FIND_IN_SET('intergenic_variant', consequence_types); | |
+-----+-----+ | |
| N | U | | |
|-----+-----| | |
| 0 | 0 | | |
+-----+-----+ | |
1 row in set | |
Time: 2.069s | |
> SELECT COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U FROM transcript_variation WHERE FIND_IN_SET('splice_acceptor_variant', consequence_types); | |
+------+------+ | |
| N | U | | |
|------+------| | |
| 1262 | 1245 | | |
+------+------+ | |
1 row in set | |
Time: 2.227s | |
> SELECT COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U FROM transcript_variation WHERE FIND_IN_SET('splice_donor_variant', consequence_types); | |
+------+------+ | |
| N | U | | |
|------+------| | |
| 1321 | 1310 | | |
+------+------+ | |
1 row in set | |
Time: 2.257s | |
Author
dbolser-ebi
commented
May 17, 2017
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment