Last active
May 17, 2017 16:42
-
-
Save dbolser-ebi/378dff176b7061b9813676ef18c7ea57 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
show create table variation_feature; | |
+-------------------+----------------+ | |
| Table | Create Table | | |
|-------------------+----------------| | |
| variation_feature | CREATE TABLE variation_feature ( | |
variation_feature_id int(10) unsigned NOT NULL AUTO_INCREMENT, | |
seq_region_id int(10) unsigned NOT NULL, | |
seq_region_start int(11) NOT NULL, | |
seq_region_end int(11) NOT NULL, | |
seq_region_strand tinyint(4) NOT NULL, | |
variation_id int(10) unsigned NOT NULL, | |
allele_string varchar(50000) DEFAULT NULL, | |
variation_name varchar(255) DEFAULT NULL, | |
map_weight int(11) NOT NULL, | |
flags set('genotyped') DEFAULT NULL, | |
source_id int(10) unsigned NOT NULL, | |
consequence_types set('intergenic_variant','splice_acceptor_variant','splice_donor_variant','stop_lost','coding_sequence_variant','missense_variant','stop_gained','synonymous_variant','frameshift_variant','non_coding_transcript_variant','non_coding_transcript_exon_variant','mature_miRNA_variant','NMD_transcript_variant','5_prime_UTR_variant','3_prime_UTR_variant','incomplete_terminal_codon_variant','intron_variant','splice_region_variant','downstream_gene_variant','upstream_gene_variant','start_lost','stop_retained_variant','inframe_insertion','inframe_deletion','transcript_ablation','transcript_fusion','transcript_amplification','transcript_translocation','TFBS_ablation','TFBS_fusion','TFBS_amplification','TFBS_translocation','regulatory_region_ablation','regulatory_region_fusion','regulatory_region_amplification','regulatory_region_translocation','feature_elongation','feature_truncation','regulatory_region_variant','TF_binding_site_variant','protein_altering_variant') NOT NULL DEFAULT 'intergenic_variant', | |
variation_set_id set('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50','51','52','53','54','55','56','57','58','59','60','61','62','63','64') NOT NULL DEFAULT '', | |
class_attrib_id int(10) unsigned DEFAULT '0', | |
somatic tinyint(1) NOT NULL DEFAULT '0', | |
minor_allele varchar(50) DEFAULT NULL, | |
minor_allele_freq float DEFAULT NULL, | |
minor_allele_count int(10) unsigned DEFAULT NULL, | |
alignment_quality double DEFAULT NULL, | |
clinical_significance set('uncertain significance','not provided','benign','likely benign','likely pathogenic','pathogenic','drug response','histocompatibility','other','confers sensitivity','risk factor','association','protective') DEFAULT NULL, | |
evidence_attribs set('367','368','369','370','371','372','418','421') DEFAULT NULL, | |
display int(1) DEFAULT '1', | |
PRIMARY KEY (variation_feature_id), | |
KEY variation_idx (variation_id), | |
KEY pos_idx (seq_region_id,seq_region_start,seq_region_end), | |
KEY variation_set_idx (variation_set_id), | |
KEY consequence_type_idx (consequence_types), | |
KEY source_idx (source_id) | |
) ENGINE=MyISAM AUTO_INCREMENT=5230594 DEFAULT CHARSET=latin1 CHECKSUM=1 | | |
+-------------------+----------------+ | |
1 row in set | |
SELECT | |
COUNT(*) AS N, | |
COUNT( DISTINCT variation_feature_id ) AS U, | |
SUM( IF(consequence_types & 1, 1, 0 ) ) AS intergenic_variant, | |
COUNT( DISTINCT IF(consequence_types & 1, variation_feature_id, NULL) ) AS intergenic_variant, | |
SUM( IF(consequence_types & 2, 1, 0 ) ) AS splice_acceptor_variant, | |
COUNT( DISTINCT IF(consequence_types & 2, variation_feature_id, NULL) ) AS splice_acceptor_variant, | |
SUM( IF(consequence_types & 4, 1, 0 ) ) AS splice_donor_variant, | |
COUNT( DISTINCT IF(consequence_types & 4, variation_feature_id, NULL) ) AS splice_donor_variant, | |
SUM( IF(consequence_types & 8, 1, 0 ) ) AS stop_lost, | |
COUNT( DISTINCT IF(consequence_types & 8, variation_feature_id, NULL) ) AS stop_lost, | |
SUM( IF(consequence_types & 16, 1, 0 ) ) AS coding_sequence_variant, | |
COUNT( DISTINCT IF(consequence_types & 16, variation_feature_id, NULL) ) AS coding_sequence_variant, | |
SUM( IF(consequence_types & 32, 1, 0 ) ) AS missense_variant, | |
COUNT( DISTINCT IF(consequence_types & 32, variation_feature_id, NULL) ) AS missense_variant, | |
SUM( IF(consequence_types & 64, 1, 0 ) ) AS stop_gained, | |
COUNT( DISTINCT IF(consequence_types & 64, variation_feature_id, NULL) ) AS stop_gained, | |
SUM( IF(consequence_types & 128, 1, 0 ) ) AS synonymous_variant, | |
COUNT( DISTINCT IF(consequence_types & 128, variation_feature_id, NULL) ) AS synonymous_variant | |
FROM | |
transcript_variation | |
\G | |
***************************[ 1. row ]*************************** | |
N | 1721493 | |
U | 1524687 | |
intergenic_variant | 1262 | |
intergenic_variant | 1245 | |
splice_acceptor_variant | 1321 | |
splice_acceptor_variant | 1310 | |
splice_donor_variant | 879 | |
splice_donor_variant | 867 | |
stop_lost | 91 | |
stop_lost | 91 | |
coding_sequence_variant | 266061 | |
coding_sequence_variant | 260208 | |
missense_variant | 9004 | |
missense_variant | 8879 | |
stop_gained | 178287 | |
stop_gained | 175978 | |
synonymous_variant | 5101 | |
synonymous_variant | 5098 | |
SELECT 'intergenic_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U | |
FROM transcript_variation WHERE FIND_IN_SET('intergenic_variant', consequence_types) UNION | |
SELECT 'splice_acceptor_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U | |
FROM transcript_variation WHERE FIND_IN_SET('splice_acceptor_variant', consequence_types) UNION | |
SELECT 'splice_donor_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U | |
FROM transcript_variation WHERE FIND_IN_SET('splice_donor_variant', consequence_types) UNION | |
SELECT 'stop_lost', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U | |
FROM transcript_variation WHERE FIND_IN_SET('stop_lost', consequence_types) UNION | |
SELECT 'coding_sequence_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U | |
FROM transcript_variation WHERE FIND_IN_SET('coding_sequence_variant', consequence_types) UNION | |
SELECT 'missense_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U | |
FROM transcript_variation WHERE FIND_IN_SET('missense_variant', consequence_types) UNION | |
SELECT 'stop_gained', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U | |
FROM transcript_variation WHERE FIND_IN_SET('stop_gained', consequence_types) UNION | |
SELECT 'synonymous_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U | |
FROM transcript_variation WHERE FIND_IN_SET('synonymous_variant', consequence_types) | |
; | |
+-------------------------+--------+--------+ | |
| intergenic_variant | N | U | | |
|-------------------------+--------+--------| | |
| intergenic_variant | 0 | 0 | | |
| splice_acceptor_variant | 1262 | 1245 | | |
| splice_donor_variant | 1321 | 1310 | | |
| stop_lost | 879 | 867 | | |
| coding_sequence_variant | 91 | 91 | | |
| missense_variant | 266061 | 260208 | | |
| stop_gained | 9004 | 8879 | | |
| synonymous_variant | 178287 | 175978 | | |
+-------------------------+--------+--------+ | |
8 rows in set | |
Time: 0.009s | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment