Skip to content

Instantly share code, notes, and snippets.

@dbolser-ebi
Last active May 17, 2017 16:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dbolser-ebi/378dff176b7061b9813676ef18c7ea57 to your computer and use it in GitHub Desktop.
Save dbolser-ebi/378dff176b7061b9813676ef18c7ea57 to your computer and use it in GitHub Desktop.
show create table variation_feature;
+-------------------+----------------+
| Table | Create Table |
|-------------------+----------------|
| variation_feature | CREATE TABLE variation_feature (
variation_feature_id int(10) unsigned NOT NULL AUTO_INCREMENT,
seq_region_id int(10) unsigned NOT NULL,
seq_region_start int(11) NOT NULL,
seq_region_end int(11) NOT NULL,
seq_region_strand tinyint(4) NOT NULL,
variation_id int(10) unsigned NOT NULL,
allele_string varchar(50000) DEFAULT NULL,
variation_name varchar(255) DEFAULT NULL,
map_weight int(11) NOT NULL,
flags set('genotyped') DEFAULT NULL,
source_id int(10) unsigned NOT NULL,
consequence_types set('intergenic_variant','splice_acceptor_variant','splice_donor_variant','stop_lost','coding_sequence_variant','missense_variant','stop_gained','synonymous_variant','frameshift_variant','non_coding_transcript_variant','non_coding_transcript_exon_variant','mature_miRNA_variant','NMD_transcript_variant','5_prime_UTR_variant','3_prime_UTR_variant','incomplete_terminal_codon_variant','intron_variant','splice_region_variant','downstream_gene_variant','upstream_gene_variant','start_lost','stop_retained_variant','inframe_insertion','inframe_deletion','transcript_ablation','transcript_fusion','transcript_amplification','transcript_translocation','TFBS_ablation','TFBS_fusion','TFBS_amplification','TFBS_translocation','regulatory_region_ablation','regulatory_region_fusion','regulatory_region_amplification','regulatory_region_translocation','feature_elongation','feature_truncation','regulatory_region_variant','TF_binding_site_variant','protein_altering_variant') NOT NULL DEFAULT 'intergenic_variant',
variation_set_id set('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50','51','52','53','54','55','56','57','58','59','60','61','62','63','64') NOT NULL DEFAULT '',
class_attrib_id int(10) unsigned DEFAULT '0',
somatic tinyint(1) NOT NULL DEFAULT '0',
minor_allele varchar(50) DEFAULT NULL,
minor_allele_freq float DEFAULT NULL,
minor_allele_count int(10) unsigned DEFAULT NULL,
alignment_quality double DEFAULT NULL,
clinical_significance set('uncertain significance','not provided','benign','likely benign','likely pathogenic','pathogenic','drug response','histocompatibility','other','confers sensitivity','risk factor','association','protective') DEFAULT NULL,
evidence_attribs set('367','368','369','370','371','372','418','421') DEFAULT NULL,
display int(1) DEFAULT '1',
PRIMARY KEY (variation_feature_id),
KEY variation_idx (variation_id),
KEY pos_idx (seq_region_id,seq_region_start,seq_region_end),
KEY variation_set_idx (variation_set_id),
KEY consequence_type_idx (consequence_types),
KEY source_idx (source_id)
) ENGINE=MyISAM AUTO_INCREMENT=5230594 DEFAULT CHARSET=latin1 CHECKSUM=1 |
+-------------------+----------------+
1 row in set
SELECT
COUNT(*) AS N,
COUNT( DISTINCT variation_feature_id ) AS U,
SUM( IF(consequence_types & 1, 1, 0 ) ) AS intergenic_variant,
COUNT( DISTINCT IF(consequence_types & 1, variation_feature_id, NULL) ) AS intergenic_variant,
SUM( IF(consequence_types & 2, 1, 0 ) ) AS splice_acceptor_variant,
COUNT( DISTINCT IF(consequence_types & 2, variation_feature_id, NULL) ) AS splice_acceptor_variant,
SUM( IF(consequence_types & 4, 1, 0 ) ) AS splice_donor_variant,
COUNT( DISTINCT IF(consequence_types & 4, variation_feature_id, NULL) ) AS splice_donor_variant,
SUM( IF(consequence_types & 8, 1, 0 ) ) AS stop_lost,
COUNT( DISTINCT IF(consequence_types & 8, variation_feature_id, NULL) ) AS stop_lost,
SUM( IF(consequence_types & 16, 1, 0 ) ) AS coding_sequence_variant,
COUNT( DISTINCT IF(consequence_types & 16, variation_feature_id, NULL) ) AS coding_sequence_variant,
SUM( IF(consequence_types & 32, 1, 0 ) ) AS missense_variant,
COUNT( DISTINCT IF(consequence_types & 32, variation_feature_id, NULL) ) AS missense_variant,
SUM( IF(consequence_types & 64, 1, 0 ) ) AS stop_gained,
COUNT( DISTINCT IF(consequence_types & 64, variation_feature_id, NULL) ) AS stop_gained,
SUM( IF(consequence_types & 128, 1, 0 ) ) AS synonymous_variant,
COUNT( DISTINCT IF(consequence_types & 128, variation_feature_id, NULL) ) AS synonymous_variant
FROM
transcript_variation
\G
***************************[ 1. row ]***************************
N | 1721493
U | 1524687
intergenic_variant | 1262
intergenic_variant | 1245
splice_acceptor_variant | 1321
splice_acceptor_variant | 1310
splice_donor_variant | 879
splice_donor_variant | 867
stop_lost | 91
stop_lost | 91
coding_sequence_variant | 266061
coding_sequence_variant | 260208
missense_variant | 9004
missense_variant | 8879
stop_gained | 178287
stop_gained | 175978
synonymous_variant | 5101
synonymous_variant | 5098
SELECT 'intergenic_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U
FROM transcript_variation WHERE FIND_IN_SET('intergenic_variant', consequence_types) UNION
SELECT 'splice_acceptor_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U
FROM transcript_variation WHERE FIND_IN_SET('splice_acceptor_variant', consequence_types) UNION
SELECT 'splice_donor_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U
FROM transcript_variation WHERE FIND_IN_SET('splice_donor_variant', consequence_types) UNION
SELECT 'stop_lost', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U
FROM transcript_variation WHERE FIND_IN_SET('stop_lost', consequence_types) UNION
SELECT 'coding_sequence_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U
FROM transcript_variation WHERE FIND_IN_SET('coding_sequence_variant', consequence_types) UNION
SELECT 'missense_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U
FROM transcript_variation WHERE FIND_IN_SET('missense_variant', consequence_types) UNION
SELECT 'stop_gained', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U
FROM transcript_variation WHERE FIND_IN_SET('stop_gained', consequence_types) UNION
SELECT 'synonymous_variant', COUNT(*) AS N, COUNT(DISTINCT variation_feature_id) AS U
FROM transcript_variation WHERE FIND_IN_SET('synonymous_variant', consequence_types)
;
+-------------------------+--------+--------+
| intergenic_variant | N | U |
|-------------------------+--------+--------|
| intergenic_variant | 0 | 0 |
| splice_acceptor_variant | 1262 | 1245 |
| splice_donor_variant | 1321 | 1310 |
| stop_lost | 879 | 867 |
| coding_sequence_variant | 91 | 91 |
| missense_variant | 266061 | 260208 |
| stop_gained | 9004 | 8879 |
| synonymous_variant | 178287 | 175978 |
+-------------------------+--------+--------+
8 rows in set
Time: 0.009s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment