Created
February 29, 2012 16:34
-
-
Save bioinfornatics/1942288 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################## | |
### file 1 of 2: test_bed.d | |
############################################################################## | |
import bed; | |
import std.stdio; | |
import std.string; | |
void main(){ | |
string filePath = "ColorByStrandDemo.txt"; | |
foreach( record; bedReade!Bed6( filePath ) ) | |
writeln(record); | |
} | |
############################################################################## | |
### file 2 of 2: bed.d | |
############################################################################## | |
/** | |
* The module csv is a set of function to parse many format using a delimitter as csv file | |
* Supported format: | |
* - .mat matrix file | |
* - .bed UCSC file | |
* For parse a .csv file use std.csv | |
*/ | |
module bed; | |
import std.conv : to; | |
import std.csv; | |
import std.file; | |
import std.array; | |
import std.algorithm; | |
import std.range; | |
import std.string; | |
import std.exception; | |
/** | |
* loadMatrixFile | |
* load a matrix from a file. | |
* Params: | |
* filePath = path to file who contain matrix | |
* separator = set delimiter used into the file for separate each column default it is tab | |
* Returns: | |
* A 2D array | |
*/ | |
T[][] matrixReader( T )( string filePath, string separator = "\t" ){ | |
File matrixFile = File( filePath, "r"); | |
T[][] matrix; | |
size_t length = 10; | |
size_t counter = 0; | |
matrix.length = 10; | |
foreach( line; matrixFile.byLine() ){ | |
if( length == counter ){ | |
length += 10; | |
matrix.length = length; | |
} | |
matrix[counter] = array( map!(to!T)( filter!"!a.empty"(line.split( separator ) ) ) );// Use filter like split bug and do not merge consecutive delimiter | |
counter++; | |
} | |
matrix.length = counter; | |
return matrix; | |
} | |
struct Bed3{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
} | |
struct Bed4{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
} | |
struct Bed5{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
} | |
struct Bed6{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
} | |
struct Bed7{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
} | |
struct Bed8{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
size_t[3] itemRgb; // 8 | |
} | |
struct Bed9{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
size_t[3] itemRgb; // 8 | |
size_t blockCount; // 9 | |
} | |
struct Bed10{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
size_t[3] itemRgb; // 8 | |
size_t blockCount; // 9 | |
size_t blockSizes; // 10 | |
} | |
struct Bed11{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
size_t[3] itemRgb; // 8 | |
size_t blockCount; // 9 | |
size_t blockSizes; // 10 | |
size_t blockStarts; // 11 | |
} | |
struct BedMetadata{ | |
string name; | |
string description; | |
size_t visibility; | |
string itemRgb; | |
size_t browserStart; | |
size_t browserEnd; | |
string chromosome; | |
string hide; | |
string toString(){ | |
string result = ""; | |
if( chromosome != "" && browserStart != 0 && browserEnd != 0 ) | |
result ~= "browser position %s:%d-%d\n".format( chromosome, browserStart, browserEnd ); | |
if( hide != "" ) | |
result ~= "browser hide %s\n".format( hide ); | |
if( name != "" && description != "" && visibility != 0 ) | |
result ~= "track name=%s description=%s visibility=%d\n".format( name, description, visibility ); | |
if( itemRgb != "" ) | |
result ~= "itemRgb=\"%s\"".format( itemRgb ); | |
return result; | |
} | |
} | |
BedMetadata bedReader( T = Bed3 )( string filePath, char delimiter='\t', int delegate(CsvReader) dg ){ | |
if( !filePath.exist ) | |
throw new FileException( "File %s is do not exist".format(filePath) ); | |
else if( !filePath.isFile ) | |
throw new FileException( "File %s is not a file".format(filePath) ); | |
File bedFile = File( filePath, "r" ); | |
BedMetadata metadata; | |
const string browserToken1 = "browser position"; | |
const string browserToken2 = "browser hide"; | |
foreach( char[] line; bedFile.byLine() ){ | |
if( line.startsWith( '#' ) ) // comment | |
continue; | |
else if( line.empty ) // empty line | |
continue; | |
else if( line.startsWith(browserToken1) ){ | |
size_t colonIndex = line[browserToken2.length .. $].countUntil(':'); | |
size_t minusIndex = line[colonIndex .. $].countUntil('-'); | |
string reversed = retro( line[browserToken2.length .. colonIndex] ); | |
size_t spaceIndexBeforeChrom = reversed.countUntil(' '); | |
size_t spaceIndexAfterPosition = line[minusIndex..$].countUntil(' '); | |
size_t endPositionIndex = 0; | |
if(spaceIndexAfterPosition == -1) | |
endPositionIndex = line.length; | |
else | |
endPositionIndex = line.spaceIndexAfterPosition; | |
metadata.chromosome = retro(reversed[0 .. spaceIndex]); | |
metadata.browserStart = to!size_t(line[colonIndex .. minusIndex]); | |
metadata.browserEnd = to!size_t(line[minusIndex .. endPositionIndex]); | |
} | |
else if( line.startsWith(browserToken2) ){ | |
if(line.length > browserToken2.length + 2) | |
metadata.hide = line[browserToken2.length + 2 .. $]; | |
else | |
throw new Exception("Malformed metadata line"); | |
} | |
else{ // data in csv format | |
CsvReader records = csvReader!T(line, delimiter); | |
dg(records); | |
} | |
} | |
return metadata; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment