Created
February 29, 2012 14:36
-
-
Save bioinfornatics/1941241 to your computer and use it in GitHub Desktop.
bed reader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* The module csv is a set of function to parse many format using a delimitter as csv file | |
* Supported format: | |
* - .mat matrix file | |
* - .bed UCSC file | |
* For parse a .csv file use std.csv | |
*/ | |
module dscience.parser.csv; | |
import std.conv : to; | |
import std.csv; | |
import std.file; | |
import std.array : array, map, split, empty; | |
import std.algorithm : filter; | |
import std.string; | |
struct Bed3{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
} | |
struct Bed4{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
} | |
struct Bed5{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
} | |
struct Bed6{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
} | |
struct Bed7{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
} | |
struct Bed8{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
size_t[3] itemRgb; // 8 | |
} | |
struct Bed9{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
size_t[3] itemRgb; // 8 | |
size_t blockCount; // 9 | |
} | |
struct Bed10{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
size_t[3] itemRgb; // 8 | |
size_t blockCount; // 9 | |
size_t blockSizes; // 10 | |
} | |
struct Bed11{ | |
string chrom; // 0 | |
size_t chromStart; // 1 | |
size_t chromEnd; // 2 | |
string name; // 3 | |
size_t score; // 4 | |
char strand; // 5 | |
size_t thickStart; // 6 | |
size_t thickEnd; // 7 | |
size_t[3] itemRgb; // 8 | |
size_t blockCount; // 9 | |
size_t blockSizes; // 10 | |
size_t blockStarts; // 11 | |
} | |
struct BedMetadata{ | |
string name; | |
string description; | |
size_t visibility; | |
string itemRgb; | |
size_t browserStart; | |
size_t browserEnd; | |
string chromosome; | |
string hide; | |
string toString(){ | |
string result = ""; | |
if( chromosome != "" && browserStart != 0 && browserEnd != 0 ) | |
result ~= "browser position %s:%d-%d\n".format( chromosome, browserStart, browserEnd ); | |
if( hide != "" ) | |
result ~= "browser hide %s\n".format( hide ); | |
if( name != "" && description != "" && visibility != 0 ) | |
result ~= "track name=%s description=%s visibility=%d\n".format( name, description, visibility ); | |
if( itemRgb != "" ) | |
result ~= "itemRgb=\"%s\"".format( itemRgb ); | |
return result; | |
} | |
} | |
BedMetadata bedReader( T=Bed3)( string filePath, char delimiter='\t', CsvReader delegate(char) dg ){ | |
if( !filePath.exist ) | |
throw new FileException( "File %s is do not exist".format(filePath) ); | |
else if( !filePath.isFile ) | |
throw new FileException( "File %s is not a file".format(filePath) ); | |
File bedFile = File( filePath, "r" ); | |
BedMetadata metadata; | |
foreach( char[] line, bedFile.byLine() ){ | |
if( line[0] == '#' ) // comment | |
continue; | |
else if( line == "" ) // empty line | |
continue; | |
else if( line[0..16] == "browser position" ){ | |
// todo | |
} | |
else if( line[0..12] == "browser hide" ){ | |
// todo | |
} | |
else{ // data in csv format | |
CsvReader records = csvReader!T(line, delimiter); | |
dg(records); | |
} | |
} | |
return metadata; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment