Skip to content

Instantly share code, notes, and snippets.

@jamessdixon
Created September 23, 2020 12:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamessdixon/897713a8f5439148d14aa327370fe81c to your computer and use it in GitHub Desktop.
Save jamessdixon/897713a8f5439148d14aa327370fe81c to your computer and use it in GitHub Desktop.
BioinformnaticsFrequentWords
let frequentWords (text:string) (k:int) =
let patternCounts =
text
|> Seq.windowed k
|> Seq.map(fun c -> new string(c))
|> Seq.countBy(fun s -> s)
|> Seq.sortByDescending(fun (s,c) -> c)
let maxCount = patternCounts |> Seq.head |> snd
patternCounts
|> Seq.filter(fun (s,c) -> c = maxCount)
|> Seq.map(fun (s,c) -> s)
let getRandomNuclotide () =
let dictionary = ["A";"C";"G";"T"]
let random = new Random()
dictionary.[random.Next(4)]
let getRandomSequence (length:int) =
let nuclotides = [ for i in 0 .. length -> getRandomNuclotide() ]
String.Join("", nuclotides)
let largerText = getRandomSequence 1000000
let currentFrequentWords = frequentWords largerText 9
currentFrequentWords
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment