Skip to content

Instantly share code, notes, and snippets.

@bioinfornatics
Created February 29, 2012 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bioinfornatics/1941241 to your computer and use it in GitHub Desktop.
Save bioinfornatics/1941241 to your computer and use it in GitHub Desktop.
bed reader
/**
* The module csv is a set of function to parse many format using a delimitter as csv file
* Supported format:
* - .mat matrix file
* - .bed UCSC file
* For parse a .csv file use std.csv
*/
module dscience.parser.csv;
import std.conv : to;
import std.csv;
import std.file;
import std.array : array, map, split, empty;
import std.algorithm : filter;
import std.string;
struct Bed3{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
}
struct Bed4{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
}
struct Bed5{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
}
struct Bed6{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
}
struct Bed7{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
}
struct Bed8{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
size_t[3] itemRgb; // 8
}
struct Bed9{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
size_t[3] itemRgb; // 8
size_t blockCount; // 9
}
struct Bed10{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
size_t[3] itemRgb; // 8
size_t blockCount; // 9
size_t blockSizes; // 10
}
struct Bed11{
string chrom; // 0
size_t chromStart; // 1
size_t chromEnd; // 2
string name; // 3
size_t score; // 4
char strand; // 5
size_t thickStart; // 6
size_t thickEnd; // 7
size_t[3] itemRgb; // 8
size_t blockCount; // 9
size_t blockSizes; // 10
size_t blockStarts; // 11
}
struct BedMetadata{
string name;
string description;
size_t visibility;
string itemRgb;
size_t browserStart;
size_t browserEnd;
string chromosome;
string hide;
string toString(){
string result = "";
if( chromosome != "" && browserStart != 0 && browserEnd != 0 )
result ~= "browser position %s:%d-%d\n".format( chromosome, browserStart, browserEnd );
if( hide != "" )
result ~= "browser hide %s\n".format( hide );
if( name != "" && description != "" && visibility != 0 )
result ~= "track name=%s description=%s visibility=%d\n".format( name, description, visibility );
if( itemRgb != "" )
result ~= "itemRgb=\"%s\"".format( itemRgb );
return result;
}
}
BedMetadata bedReader( T=Bed3)( string filePath, char delimiter='\t', CsvReader delegate(char) dg ){
if( !filePath.exist )
throw new FileException( "File %s is do not exist".format(filePath) );
else if( !filePath.isFile )
throw new FileException( "File %s is not a file".format(filePath) );
File bedFile = File( filePath, "r" );
BedMetadata metadata;
foreach( char[] line, bedFile.byLine() ){
if( line[0] == '#' ) // comment
continue;
else if( line == "" ) // empty line
continue;
else if( line[0..16] == "browser position" ){
// todo
}
else if( line[0..12] == "browser hide" ){
// todo
}
else{ // data in csv format
CsvReader records = csvReader!T(line, delimiter);
dg(records);
}
}
return metadata;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment