Skip to content

Instantly share code, notes, and snippets.

@aarani
Created September 6, 2019 19:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aarani/27fb48f9ea167e17d89c60e55425a0cb to your computer and use it in GitHub Desktop.
Save aarani/27fb48f9ea167e17d89c60e55425a0cb to your computer and use it in GitHub Desktop.
ABI DNA Chromatogram File Parser for C#
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
namespace ABIParser
{
public class ABIParser
{
private readonly byte[] fileData;
private string Sequence { get; set; }
private int[] A, G, C, T;
public int[] BaseCalls { get; private set; }
public int[] QCalls { get; private set; }
public int TraceLength { get; private set; }
public int SeqLength { get; private set; }
/* sometimes when macintosh files are
* FTPed in binary form, they have 128 bytes
* of invalid data pre-pended to them. This constant
* allows ABITrace to handle that in a way that
* is invisible to the user.
*/
private int macJunk = 0;
private const int absIndexBase = 26; //The file location of the Index pointer
private int PLOC, PCON;
//the next declaration is for the actual file pointers
private int DATA9, DATA10, DATA11, DATA12, PBAS2, FWO;
public ABIParser(string path)
{
fileData = File.ReadAllBytes(path);
if (!IsABI())
{
throw new FormatException("File is not a valid .ab1 file");
}
SetIndex();
SetBasecalls();
SetQcalls();
SetSeq();
SetTraces();
}
public string GetSequence() =>
Sequence;
private void SetIndex()
{
int DataCounter, PBASCounter, PLOCCounter, PCONCounter, NumRecords, indexBase;
byte[] RecNameArray = new byte[4];
string RecName;
DataCounter = 0;
PBASCounter = 0;
PLOCCounter = 0;
PCONCounter = 0;
indexBase = GetIntAt(absIndexBase + macJunk);
NumRecords = GetIntAt(absIndexBase - 8 + macJunk);
for (int record = 0; record <= NumRecords - 1; record++)
{
GetSubArray(ref RecNameArray, (indexBase + (record * 28)));
RecName = Encoding.ASCII.GetString(RecNameArray);
if (RecName.Equals("FWO_"))
FWO = indexBase + (record * 28) + 20;
if (RecName.Equals("DATA"))
{
++DataCounter;
if (DataCounter == 9)
DATA9 = indexBase + (record * 28) + 20;
if (DataCounter == 10)
DATA10 = indexBase + (record * 28) + 20;
if (DataCounter == 11)
DATA11 = indexBase + (record * 28) + 20;
if (DataCounter == 12)
DATA12 = indexBase + (record * 28) + 20;
}
if (RecName.Equals("PBAS"))
{
++PBASCounter;
if (PBASCounter == 2)
PBAS2 = indexBase + (record * 28) + 20;
}
if (RecName.Equals("PLOC"))
{
++PLOCCounter;
if (PLOCCounter == 2)
PLOC = indexBase + (record * 28) + 20;
}
if (RecName.Equals("PCON"))
{
++PCONCounter;
if (PCONCounter == 2)
PCON = indexBase + (record * 28) + 20;
}
} //next record
TraceLength = GetIntAt(DATA12 - 8);
SeqLength = GetIntAt(PBAS2 - 4);
PLOC = GetIntAt(PLOC) + macJunk;
DATA9 = GetIntAt(DATA9) + macJunk;
DATA10 = GetIntAt(DATA10) + macJunk;
DATA11 = GetIntAt(DATA11) + macJunk;
DATA12 = GetIntAt(DATA12) + macJunk;
PBAS2 = GetIntAt(PBAS2) + macJunk;
PCON = GetIntAt(PCON) + macJunk;
}
private void SetBasecalls()
{
BaseCalls = new int[SeqLength];
byte[] qq = new byte[SeqLength * 2];
GetSubArray(ref qq, PLOC);
using (MemoryStream ms = new MemoryStream(qq))
using (BinaryReader reader = new BinaryReader(ms))
for (int i = 0; i <= SeqLength - 1; ++i)
{
BaseCalls[i] = ReadInt16BE(reader);
}
}
private void SetQcalls()
{
QCalls = new int[SeqLength];
byte[] qq = new byte[SeqLength];
GetSubArray(ref qq, PCON);
using (MemoryStream ms = new MemoryStream(qq))
for (int i = 0; i <= SeqLength - 1; ++i)
{
QCalls[i] = ms.ReadByte();
}
}
private void SetSeq()
{
char[] tempseq = new char[SeqLength];
for (int x = 0; x <= SeqLength - 1; ++x)
{
tempseq[x] = (char)fileData[PBAS2 + x];
}
Sequence = new string(tempseq);
}
private void SetTraces()
{
int[] pointers = new int[4]; //alphabetical, 0=A, 1=C, 2=G, 3=T
int[] datas = new int[4];
char[] order = new char[4];
datas[0] = DATA9;
datas[1] = DATA10;
datas[2] = DATA11;
datas[3] = DATA12;
for (int i = 0; i <= 3; i++)
{
order[i] = (char)fileData[FWO + i];
}
for (int i = 0; i <= 3; i++)
{
switch (order[i])
{
case 'A':
case 'a':
pointers[0] = datas[i];
break;
case 'C':
case 'c':
pointers[1] = datas[i];
break;
case 'G':
case 'g':
pointers[2] = datas[i];
break;
case 'T':
case 't':
pointers[3] = datas[i];
break;
default:
throw new ArgumentException("Trace contains illegal values.");
}
}
A = new int[TraceLength];
C = new int[TraceLength];
G = new int[TraceLength];
T = new int[TraceLength];
for (int i = 0; i <= 3; i++)
{
byte[] qq = new byte[TraceLength * 2];
GetSubArray(ref qq, pointers[i]);
using (MemoryStream ms = new MemoryStream(qq))
using (BinaryReader reader = new BinaryReader(ms))
for (int x = 0; x <= TraceLength - 1; x++)
{
if (i == 0) A[x] = ReadInt16BE(reader);
if (i == 1) C[x] = ReadInt16BE(reader);
if (i == 2) G[x] = ReadInt16BE(reader);
if (i == 3) T[x] = ReadInt16BE(reader);
}
}
return;
}
private int GetIntAt(int pointer)
{
int @out = 0;
byte[] temp = new byte[4];
GetSubArray(ref temp, pointer);
using (MemoryStream ms = new MemoryStream(temp))
using (BinaryReader reader = new BinaryReader(ms))
@out = ReadInt32BE(reader);
return @out;
}
private static int ReadInt32BE(BinaryReader reader)
{
return BitConverter.ToInt32(reader.ReadBytes(4).Reverse().ToArray(), 0);
}
private static int ReadInt16BE(BinaryReader reader)
{
return BitConverter.ToInt16(reader.ReadBytes(2).Reverse().ToArray(), 0);
}
private void GetSubArray(ref byte[] output, int traceDataOffset)
{
for (int x = 0; x <= output.Length - 1; x++)
{
output[x] = fileData[traceDataOffset + x];
}
}
private bool IsABI()
{
char[] ABI = new char[4];
for (int i = 0; i <= 2; i++)
{
ABI[i] = (char)fileData[i];
}
if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I'))
{
return true;
}
else
{
for (int i = 128; i <= 130; i++)
{
ABI[i - 128] = (char)fileData[i];
}
if (ABI[0] == 'A' && (ABI[1] == 'B' && ABI[2] == 'I'))
{
macJunk = 128;
return true;
}
else
return false;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment