Skip to content

Instantly share code, notes, and snippets.

@ewels
Created September 17, 2014 07:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ewels/a027fcb36664ce964a1b to your computer and use it in GitHub Desktop.
Save ewels/a027fcb36664ce964a1b to your computer and use it in GitHub Desktop.
Cytoband Coordinate Converter
#!/usr/bin/perl
use CGI::Carp qw(fatalsToBrowser); # pipe errors to the browser instead of the terminal for debugging
use warnings;
use strict;
use Data::Dumper;
#print Dumper(\%args);
print "Content-type: text/plain\n\n";
#############################################################
# Name: Cytobands - Coordinates: Online Version #
# Author: Phil Ewels #
# Version 1.0 - 27/03/2012 #
# --------------------------------------------------------- #
# Converts a list of cytogenetic bands to genome #
# coordinates (and visa versa?) #
# --------------------------------------------------------- #
# Cytobands - Coordinates is licensed under a Creative #
# Commons Attribution-ShareAlike 3.0 Unported License. #
# Based on a work at www.tallphil.co.uk. #
#############################################################
#loads CGI with standard shortcuts
use CGI qw/:standard/;
# Setup - input from web interface
my $cytobands_input = param('cytobands') || '';
my $genome = param('genome') || '';
if($cytobands_input eq ''){
# no variables - redirect to form
print redirect('index.html');
exit;
}
# clean data to stop any nasties and set a default
if($genome eq '' || ($genome ne 'hg19' && $genome ne 'mm9')){
$genome = 'hg19';
}
my $i;
########
# Parse cytobands
########
my @input_lines = split(/\n/, $cytobands_input);
my @input;
warn "\n";
foreach (@input_lines) {
# look for p or q
my $match;
if($genome eq 'mm9'){
$match = '^\D*(\d+)\s*([abcdefghpa])(([abcdefgh])?\d+(\.\d+)?)\D*$';
} else {
$match = '^\D*(\d+)\s*([pq])(\d+(\.\d+)?)\D*$';
}
#print "input = $_\nmatch = $match\n";
$_ =~ m/$match/i;
my $before = $1; # anything that came before the match
my $pq = $2; # the match
my $after = $3; # anything after the match
#print "before = $before\npq = $pq\nafter = $after\n";
if($pq =~ m/[A-H]/i && $genome eq 'mm9'){
$after = $pq.$after;
$pq = 'q';
}
# pull out relevant chunks
my ($chr) = $before =~ m/(\d+)\s*$/;
my $band;
if($genome eq 'mm9'){
($band) = $after =~ m/\s*([\dA-H]+(\.\d+)?)/i;
} else {
($band) = $after =~ m/\s*(\d+(\.\d+)?)/;
}
#print "chr = $chr\nband=$band\n";
# check and write to array
if($chr ne '' && $pq ne '' && $band ne ''){
push @input, {'chr' => $chr, 'pq' => $pq, 'band' => $band, 'type' => 'cytoband'};
warn "Interpreted $chr$pq$band from: ".$_." \n";
} else {
### not a cytoband, is it a coordinate?
$_ =~ s/(:|-)/ /g; # swap common separaters with whitespace
$_ =~ s/[^\d\sXY]+//g; # remove anything that's not a number or whitespace, or X or Y
my @sections = split(/\s/);
my $numsections = @sections;
if($numsections eq 3) {
$chr = $sections[0];
my $start = $sections[1];
my $end = $sections[2];
push @input, {'chr' => $chr, 'start' => $start, 'end' => $end, 'type' => 'coordinate'};
warn "Interpreted $chr:$start-$end from: ".$_." \n";
} else {
warn "Couldn't understand line: ".$_."\n";
}
}
}
warn "\n";
my $numinput = @input;
if($numinput == 0){
print "Error - No input lines recognised.";
exit;
}
########
# Convert parsed cytobands to co-ordinates
########
my @conversions;
my $referencefile = "cytoBand_$genome.txt";
open (IN,$referencefile) or die "Can't read file $referencefile: $!";
while (<IN>) {
chomp;
my @sections = split(/\t/);
#warn $sections[0]."\t".$sections[3]."\t".$sections[1]."\t".$sections[2]."\n";
for ($i=0; $i<@input; $i++) {
my $type = $input[$i]->{type};
my $chr = $input[$i]->{chr};
if($type eq 'cytoband'){
my $pq = $input[$i]->{pq};
my $band = $input[$i]->{band};
# same chr?
if("chr$chr" eq $sections[0]){
# same arm?
if ($sections[3] =~ /($pq)/){
#start with same band?
my $pqband = $pq.$band;
if($sections[3] =~ /($pqband)/){
my $start = $sections[1];
my $end = $sections[2];
# Do we already have a match for this?
if(exists($conversions[$i])){
#warn "chr$chr$pq$band\t".$sections[0].$sections[3]."\texists\t$chr:$start-$end\n";
if($conversions[$i]{'start'} > $start){
$conversions[$i]{'start'} = $start;
}
if($conversions[$i]{'end'} < $end){
$conversions[$i]{'end'} = $end;
}
} else {
#warn "chr$chr$pq$band\t".$sections[0].$sections[3]."\tnew\t$chr:$start-$end\n";
if($genome eq 'mm9'){
$conversions[$i]{'input'} = "chr$chr$band";
} else {
$conversions[$i]{'input'} = "chr$chr$pq$band";
}
$conversions[$i]{'chr'} = $chr;
$conversions[$i]{'start'} = $start;
$conversions[$i]{'end'} = $end;
$conversions[$i]{'type'} = 'coordinate';
}
}
}
}
} elsif($type eq 'coordinate') {
my $start = $input[$i]->{start};
my $end = $input[$i]->{end};
# same chr?
if("chr$chr" eq $sections[0]){
# in this window?
if($start ge $sections[1] && $end le $sections[2]){
my $cytoband = $sections[3];
$conversions[$i]{'cytoband'} = $chr.$cytoband;
$conversions[$i]{'input'} = "$chr:$start-$end";
$conversions[$i]{'type'} = 'cytoband';
}
}
}
}
}
my $numconversions = @conversions;
if($numconversions == 0){
print "Error - No conversions found. $numinput input lines parsed:\n\n".join("\n", Dumper(@input));
exit;
}
########
# Output results
########
foreach my $conversion ( @conversions) {
if($conversion->{type} eq 'coordinate'){
print $conversion->{chr}, "\t";
print $conversion->{start}, "\t";
print $conversion->{end}, "\t";
print $conversion->{input}, "\n";
} elsif($conversion->{type} eq 'cytoband'){
print $conversion->{cytoband}, "\t";
print $conversion->{input}, "\n";
}
}
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
<head>
<title>Cytoband Coordinate Converter</title>
<meta name="keywords" content="Phil Ewels, Cytoband Converter, Cytogenetic Bands, Chromosomal Position, Bioinformatics" />
<meta name="copyright" content="copyright 2012 Phil Ewels" />
<link rel="stylesheet" type="text/css" href="styles.css" />
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
</head>
<body>
<h1>Cytogenetic Band : Genome Coordinate Converter</h1>
<p>Enter a list of either cytogenetic bands or genomic co-ordinates, and this script will convert one to the other.</p>
<form method="post" action="cytobands.pl" enctype="multipart/form-data">
<p><label>Species: <select name="genome"><option value="hg19">Human (hg19)</option><option value="mm9">Mouse (mm9)</option></select></label></p>
<p><label>Cytogenetic bands in the format <strong><em>[chr][arm][band]</em></strong> or coordinate in the format <strong><em>[chr]:[start]-[end]</em></strong><br /><br />
<small><em>For example: 10q26.11 or 10:129342-234987</em></small><br /><br />
<textarea name="cytobands"></textarea></label></p>
<p><input type="submit" value="Submit"></p>
</form>
<p>Conversions are made using the UCSC chromosome band map, available through the <a href="http://genome.ucsc.edu/cgi-bin/hgTables" target="_blank">UCSC table browser</a>.</p>
<p>Please note that this map is <strong>not</strong> identical to that used by <em>ensembl</em>.</p>
<p class="licence"><a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/"><img alt="Creative Commons Licence" style="border-width:0" src="http://i.creativecommons.org/l/by-sa/3.0/88x31.png" /></a><br />
<span xmlns:dct="http://purl.org/dc/terms/" href="http://purl.org/dc/dcmitype/InteractiveResource" property="dct:title" rel="dct:type">Cytoband Converter</span> by <a xmlns:cc="http://creativecommons.org/ns#" href="http://phil.ewels.co.uk" property="cc:attributionName" rel="cc:attributionURL">Phil Ewels</a> is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/">Creative Commons Attribution-ShareAlike 3.0 Unported License</a>.<br />Based on a work at <a xmlns:dct="http://purl.org/dc/terms/" href="http://www.tallphil.co.uk/cytoband/" rel="dct:source">www.tallphil.co.uk</a>.</p>
<!-- Google Analytics Code -->
<script type="text/javascript">
var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
try { var pageTracker = _gat._getTracker("UA-10500167-1"); pageTracker._trackPageview(); } catch(err) {}
</script>
</body>
</html>
/* CSS Styles for cytobands.pl */
body {
margin:0;
padding:20px 40px;
border-top:4px solid #00FBFF;
color: #2d2d2d;
font-family: sans-serif;
font-size:12pt;
}
textarea {
min-width:600px;
width:50%;
height:300px;
}
input {
font-size:x-large;
padding:10px 15px;
}
.monospace {
font-family:monospace;
}
.licence {
color:#999;
font-size:8pt;
margin-top:20px;
padding-top:20px;
border-top:1px solid #ccc;
}
.licence a {
color:#999;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment