Skip to content

Instantly share code, notes, and snippets.

@bioinfornatics
Created February 29, 2012 16:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bioinfornatics/1942288 to your computer and use it in GitHub Desktop.
Save bioinfornatics/1942288 to your computer and use it in GitHub Desktop.
##############################################################################
### file 1 of 2: test_bed.d
##############################################################################
import bed;
import std.stdio;
import std.string;
void main(){
string filePath = "ColorByStrandDemo.txt";
foreach( record; bedReade!Bed6( filePath ) )
writeln(record);
}
##############################################################################
### file 2 of 2: bed.d
##############################################################################
/**
* The module csv is a set of function to parse many format using a delimitter as csv file
* Supported format:
* - .mat matrix file
* - .bed UCSC file
* For parse a .csv file use std.csv
*/
module bed;
import std.conv : to;
import std.csv;
import std.file;
import std.array;
import std.algorithm;
import std.range;
import std.string;
import std.exception;
/**
* loadMatrixFile
* load a matrix from a file.
* Params:
* filePath = path to file who contain matrix
* separator = set delimiter used into the file for separate each column default it is tab
* Returns:
* A 2D array
*/
T[][] matrixReader( T )( string filePath, string separator = "\t" ){
File matrixFile = File( filePath, "r");
T[][] matrix;
size_t length = 10;
size_t counter = 0;
matrix.length = 10;
foreach( line; matrixFile.byLine() ){
if( length == counter ){
length += 10;
matrix.length = length;
}
matrix[counter] = array( map!(to!T)( filter!"!a.empty"(line.split( separator ) ) ) );// Use filter like split bug and do not merge consecutive delimiter
counter++;
}
matrix.length = counter;
return matrix;
}
struct Bed3{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
}
struct Bed4{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
}
struct Bed5{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
}
struct Bed6{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
}
struct Bed7{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
}
struct Bed8{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
size_t[3] itemRgb; // 8
}
struct Bed9{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
size_t[3] itemRgb; // 8
size_t blockCount; // 9
}
struct Bed10{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
size_t[3] itemRgb; // 8
size_t blockCount; // 9
size_t blockSizes; // 10
}
struct Bed11{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
size_t[3] itemRgb; // 8
size_t blockCount; // 9
size_t blockSizes; // 10
size_t blockStarts; // 11
}
struct BedMetadata{
string name;
string description;
size_t visibility;
string itemRgb;
size_t browserStart;
size_t browserEnd;
string chromosome;
string hide;
string toString(){
string result = "";
if( chromosome != "" && browserStart != 0 && browserEnd != 0 )
result ~= "browser position %s:%d-%d\n".format( chromosome, browserStart, browserEnd );
if( hide != "" )
result ~= "browser hide %s\n".format( hide );
if( name != "" && description != "" && visibility != 0 )
result ~= "track name=%s description=%s visibility=%d\n".format( name, description, visibility );
if( itemRgb != "" )
result ~= "itemRgb=\"%s\"".format( itemRgb );
return result;
}
}
BedMetadata bedReader( T = Bed3 )( string filePath, char delimiter='\t', int delegate(CsvReader) dg ){
if( !filePath.exist )
throw new FileException( "File %s is do not exist".format(filePath) );
else if( !filePath.isFile )
throw new FileException( "File %s is not a file".format(filePath) );
File bedFile = File( filePath, "r" );
BedMetadata metadata;
const string browserToken1 = "browser position";
const string browserToken2 = "browser hide";
foreach( char[] line; bedFile.byLine() ){
if( line.startsWith( '#' ) ) // comment
continue;
else if( line.empty ) // empty line
continue;
else if( line.startsWith(browserToken1) ){
size_t colonIndex = line[browserToken2.length .. $].countUntil(':');
size_t minusIndex = line[colonIndex .. $].countUntil('-');
string reversed = retro( line[browserToken2.length .. colonIndex] );
size_t spaceIndexBeforeChrom = reversed.countUntil(' ');
size_t spaceIndexAfterPosition = line[minusIndex..$].countUntil(' ');
size_t endPositionIndex = 0;
if(spaceIndexAfterPosition == -1)
endPositionIndex = line.length;
else
endPositionIndex = line.spaceIndexAfterPosition;
metadata.chromosome = retro(reversed[0 .. spaceIndex]);
metadata.browserStart = to!size_t(line[colonIndex .. minusIndex]);
metadata.browserEnd = to!size_t(line[minusIndex .. endPositionIndex]);
}
else if( line.startsWith(browserToken2) ){
if(line.length > browserToken2.length + 2)
metadata.hide = line[browserToken2.length + 2 .. $];
else
throw new Exception("Malformed metadata line");
}
else{ // data in csv format
CsvReader records = csvReader!T(line, delimiter);
dg(records);
}
}
return metadata;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment