Skip to content

Instantly share code, notes, and snippets.

@bsipos
Created March 29, 2020 14:06
Show Gist options
  • Save bsipos/58f41adebcb8a1d2c7613db80a7afebd to your computer and use it in GitHub Desktop.
Save bsipos/58f41adebcb8a1d2c7613db80a7afebd to your computer and use it in GitHub Desktop.
// CountFastxRecords counts the number of records in a fastx file.
func CountFastxRecords(fh *os.File, buffSize int, format string) int {
// Start the timer:
start := time.Now()
pattern := []byte{'\n', '+', '\n'} // Pattern to look for.
// Create new buffered reader:
reader := bufio.NewReaderSize(fh, buffSize)
// Byte buffer:
buffer := make([]byte, buffSize)
// Auxiliary buffer:
auxBuff := make([]byte, 0, 2)
// Variable for counting records:
var records int
for {
// Read next line:
nrBytes, err := reader.Read(buffer)
// Anything else went wrong?
if err != io.EOF {
checkError(err)
}
// Count the number of fastq separator lines in the buffer:
records += bytes.Count(buffer[:nrBytes], pattern)
// We have bytes across the buffer boundary:
if len(auxBuff) > 0 && nrBytes > 0 {
// Do we have a record separator line across the buffer boundaries (|)?
auxLast := len(auxBuff) - 1
if (auxBuff[auxLast] == pattern[0]) && (bytes.Equal(buffer[0:2], pattern[1:3])) {
// \n|+\n
records++
}
if len(auxBuff) < 2 {
continue
}
if bytes.Equal(auxBuff[auxLast-1:], pattern[:2]) && buffer[0] == pattern[2] {
// \n+|\n
records++
}
}
// Save the last bytes:
saveLen := 2
if nrBytes < saveLen {
saveLen = nrBytes
}
auxBuff = auxBuff[:0]
auxBuff = append(auxBuff, buffer[nrBytes-saveLen:nrBytes]...)
// Are we at the end of the file?
if err == io.EOF {
break
}
}
// Print out the time needed for counting:
fmt.Fprintf(os.Stderr, "%d\n", time.Since(start).Microseconds())
// Return the number of records:
return records
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment