Skip to content

Instantly share code, notes, and snippets.

@mzaks
Created May 23, 2023 06:19
Show Gist options
  • Save mzaks/cdae83132f12ab833e177fd26e50941b to your computer and use it in GitHub Desktop.
Save mzaks/cdae83132f12ab833e177fd26e50941b to your computer and use it in GitHub Desktop.
Count nucleotides with Mojo
from DType import DType
from Functional import vectorize
from Pointer import DTypePointer
from String import String, ord
from TargetInfo import dtype_simd_width
alias simd_width_u8 = dtype_simd_width[DType.ui8]()
let dna = "CAAGAACCAAGATAACACTCATCGTTTACTTCTTACCCGTGCCAATTCGTATTACAAACGAAACCGTGTGGGCCATGTTCGTTATCCGAGGCCCCTTCAATTACTCGTCACTAGTGACCGTCGCTACTATGCCGTGTCCATGATATTACATCAAGACAATGAGATACGAAACGACAGCTGTTCCTACGCCTCGCGAGGGGTTCTACCCCTGAGCCGTGGGAACAGGCCGTCCGACGATCTTCAAGTGTTAAAGCTAGAAAACTTGATCAGAGAACAGTGACAATCCGGTGCAATTAGGGCGCTTCTAGCAAAGTCTTGACGGTTGACATGCTATTCTACCGGCGCAGGTTGCTTGAATGCGCGGGAGTTTTAAGCTCCTCTGTCACGCCATGCCCCCTGCAGTAGCTCACCAGCAAGAAGTTGGCTTAATATACCTGGTAGGAACGTTTGGTTAAACTTCTTTCCCTCTTCTTATACCGATGACACCTACCAATTACGGTCGGCCCGCCCGTGATCCAAACAGGCCTTAATCTTCCAATAATTCAATATGTGTGTGGCTTACAGGAGTCGAATATTTATAAGTGCATTCCTGCCTTCGCTGTTGCGATTTATAGCATCTTATGGTGGCGCAGGGCAACACTTAAAAGGGAGCCAACATGAGTTTCTAGCGTCAGGCACTGCCCTGAGGTAAAGGAATACCTGTTCGATACTATGAGGCGAGATCGCCCCACCTTAAAACAGAAAGACGGTAACGGTCCCTAGCCATTTCCTTATTGCGTACGAGATTATGGAACGCTTAAAGCCGAGCCTTGAACGGCTCCTATCCTGAGCCGTAGCCCAATGAACGTGTTCCCAAGAACGCGAACTCCGCTGGGATAGAGTTCCCGTCTGGACCGGATGAGGTGAAAGACAGTTCGGCAGTGCGAGCGCATTGTTTT"
fn count_nucleotides(dna: StringLiteral) -> String:
let p = DTypePointer[DType.si8](dna.data())
let string_byte_length = len(dna)
var a_char = ord("A")
var c_char = ord("C")
var g_char = ord("G")
var t_char = ord("T")
var a_count = 0
var c_count = 0
var g_count = 0
var t_count = 0
@parameter
fn count[simd_width: Int](offset: Int):
a_count += (p.simd_load[simd_width](offset) == a_char).cast[DType.ui8]().reduce_add().to_int()
c_count += (p.simd_load[simd_width](offset) == c_char).cast[DType.ui8]().reduce_add().to_int()
g_count += (p.simd_load[simd_width](offset) == g_char).cast[DType.ui8]().reduce_add().to_int()
t_count += (p.simd_load[simd_width](offset) == t_char).cast[DType.ui8]().reduce_add().to_int()
vectorize[simd_width_u8, count](string_byte_length)
return String(a_count) + ' ' + String(c_count) + ' ' + String(g_count) + ' ' + String(t_count)
print(count_nucleotides(dna))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment