Skip to content

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
C# / JavaScript - Splitting a string / sequence into parts of equal length ; split after n number of characters ; split sequence into GFF / FASTA like structure
//www.lsauer.com 2012
//FASTA linear sequence
"MEIEKSNNGGSNPSAGEEFKDMIKGVTKFLMMVIFLGTIMLWIMMPTLTYRTKWLPHLRIKFGTSTYFGATGTTLFMYMFPMMVVACLGCVYLHFKNRKSPHHIDRETKGGVWSKLRKPMLVKGPLGIVSVTEITFLAMFVALLLWCFITYLRNSFATITPKSAAAHDESLWQAKLESAALRLGLIGNICLAFLFLPVARGSSLLPAMGLTSESSIKYHIWLGHMVMALFTVHGLCYIIYWASMHEISQMIMWDTKGVSNLAGEIALAAGLVMWATTYPKIRRRFFEVFFYTHYLYIVFMLFFVLHVGISFSFIALPGFYIFLVDRFLRFLQSRENVRLLAARILPSDTMELTFSKNSKLVYSPTSIMFVNIPSISKLQWHPFTITSSSKLEPEKLSIVIKKEGKWSTKLHQRLSSSDQIDRLAVSVEGPYGPASADFLRHEALVMVCGGSGITPFISVIRDLIATSQKETCKIPKITLICAFKKSSEISMLDLVLPLSGLETELSSDINIKIEAFITRDNDAGDEAKAGKIKTLWFKPSLSDQSISSILGPNSWLWLGAILASSFLIFMIIIGIITRYYIYPIDHNTNKIYSLTSKTIIYILVISVSIMATCSAAMLWNKKKYGKVESKQVQNVDRPSPTSSPTSSWGYNSLREIESTPQESLVQRTNLHFGERPNLKKLLLDVEGSSVGVLVCGPKKMRQKVAEICSSGLAENLHFESISFSW"
.split(/(.{50})/gm).filter(Boolean)
//there is also the neat CSS 'word-break:break-all;' - property and word-wrap, but neither will let you specify the exact amount of character to break to.
//result - useful for FASTA splitting
["MEIEKSNNGGSNPSAGEEFKDMIKGVTKFLMMVIFLGTIMLWIMMPTLTY", "RTKWLPHLRIKFGTSTYFGATGTTLFMYMFPMMVVACLGCVYLHFKNRKS",
"PHHIDRETKGGVWSKLRKPMLVKGPLGIVSVTEITFLAMFVALLLWCFIT", "YLRNSFATITPKSAAAHDESLWQAKLESAALRLGLIGNICLAFLFLPVAR",
"GSSLLPAMGLTSESSIKYHIWLGHMVMALFTVHGLCYIIYWASMHEISQM", "IMWDTKGVSNLAGEIALAAGLVMWATTYPKIRRRFFEVFFYTHYLYIVFM",
"LFFVLHVGISFSFIALPGFYIFLVDRFLRFLQSRENVRLLAARILPSDTM", "ELTFSKNSKLVYSPTSIMFVNIPSISKLQWHPFTITSSSKLEPEKLSIVI",
"KKEGKWSTKLHQRLSSSDQIDRLAVSVEGPYGPASADFLRHEALVMVCGG", "SGITPFISVIRDLIATSQKETCKIPKITLICAFKKSSEISMLDLVLPLSG",
"LETELSSDINIKIEAFITRDNDAGDEAKAGKIKTLWFKPSLSDQSISSIL", "GPNSWLWLGAILASSFLIFMIIIGIITRYYIYPIDHNTNKIYSLTSKTII",
"YILVISVSIMATCSAAMLWNKKKYGKVESKQVQNVDRPSPTSSPTSSWGY", "NSLREIESTPQESLVQRTNLHFGERPNLKKLLLDVEGSSVGVLVCGPKKM",
"RQKVAEICSSGLAENLHFESISFSW"]
//in GFF / GPFF (formatting
"MEIEKSNNGGSNPSAGEEFKDMIKGVTKFLMMVIFLGTIMLWIMMPTLTYRTKWLPHLRIKFGTSTYFGATGTTLFMYMFPMMVVACLGCVYLHFKNRKSPHHIDRETKGGVWSKLRKPMLVKGPLGIVSVTEITFLAMFVALLLWCFITYLRNSFATITPKSAAAHDESLWQAKLESAALRLGLIGNICLAFLFLPVARGSSLLPAMGLTSESSIKYHIWLGHMVMALFTVHGLCYIIYWASMHEISQMIMWDTKGVSNLAGEIALAAGLVMWATTYPKIRRRFFEVFFYTHYLYIVFMLFFVLHVGISFSFIALPGFYIFLVDRFLRFLQSRENVRLLAARILPSDTMELTFSKNSKLVYSPTSIMFVNIPSISKLQWHPFTITSSSKLEPEKLSIVIKKEGKWSTKLHQRLSSSDQIDRLAVSVEGPYGPASADFLRHEALVMVCGGSGITPFISVIRDLIATSQKETCKIPKITLICAFKKSSEISMLDLVLPLSGLETELSSDINIKIEAFITRDNDAGDEAKAGKIKTLWFKPSLSDQSISSILGPNSWLWLGAILASSFLIFMIIIGIITRYYIYPIDHNTNKIYSLTSKTIIYILVISVSIMATCSAAMLWNKKKYGKVESKQVQNVDRPSPTSSPTSSWGYNSLREIESTPQESLVQRTNLHFGERPNLKKLLLDVEGSSVGVLVCGPKKMRQKVAEICSSGLAENLHFESISFSW"
.split(/(.{10})/gm).filter(Boolean).map( function(e,i,a){ var pos=(i*10)+1; return (!(i%6)?'\n'+' '.slice(0,4-(''+pos).length)+(pos)+' '+e:e)}).join(' ')
//>result
1 MEIEKSNNGG SNPSAGEEFK DMIKGVTKFL MMVIFLGTIM LWIMMPTLTY RTKWLPHLRI
61 KFGTSTYFGA TGTTLFMYMF PMMVVACLGC VYLHFKNRKS PHHIDRETKG GVWSKLRKPM
121 LVKGPLGIVS VTEITFLAMF VALLLWCFIT YLRNSFATIT PKSAAAHDES LWQAKLESAA
181 LRLGLIGNIC LAFLFLPVAR GSSLLPAMGL TSESSIKYHI WLGHMVMALF TVHGLCYIIY
241 WASMHEISQM IMWDTKGVSN LAGEIALAAG LVMWATTYPK IRRRFFEVFF YTHYLYIVFM
301 LFFVLHVGIS FSFIALPGFY IFLVDRFLRF LQSRENVRLL AARILPSDTM ELTFSKNSKL
361 VYSPTSIMFV NIPSISKLQW HPFTITSSSK LEPEKLSIVI KKEGKWSTKL HQRLSSSDQI
421 DRLAVSVEGP YGPASADFLR HEALVMVCGG SGITPFISVI RDLIATSQKE TCKIPKITLI
481 CAFKKSSEIS MLDLVLPLSG LETELSSDIN IKIEAFITRD NDAGDEAKAG KIKTLWFKPS
541 LSDQSISSIL GPNSWLWLGA ILASSFLIFM IIIGIITRYY IYPIDHNTNK IYSLTSKTII
601 YILVISVSIM ATCSAAMLWN KKKYGKVESK QVQNVDRPSP TSSPTSSWGY NSLREIESTP
661 QESLVQRTNL HFGERPNLKK LLLDVEGSSV GVLVCGPKKM RQKVAEICSS GLAENLHFES
721 ISFSW
//lo sauer, 2012 - free use
//analog solution in C# and using LINQ for filtering empty elements
using System;
using System.Text.RegularExpressions;
Regex.Split(
"MEIEKSNNGGSNPSAGEEFKDMIKGVTKFLMMVIFLGTIMLWIMMPTLTYRTKWLPHLRIKFGTSTYFGATGTTLFMYMFPMMVVACLGCVYLHFKNRKSPHHIDRETKGGVWSKLRKPMLVKGPLGIVSVTEITFLAMFVALLLWCFITYLRNSFATITPKSAAAHDESLWQAKLESAALRLGLIGNICLAFLFLPVARGSSLLPAMGLTSESSIKYHIWLGHMVMALFTVHGLCYIIYWASMHEISQMIMWDTKGVSNLAGEIALAAGLVMWATTYPKIRRRFFEVFFYTHYLYIVFMLFFVLHVGISFSFIALPGFYIFLVDRFLRFLQSRENVRLLAARILPSDTMELTFSKNSKLVYSPTSIMFVNIPSISKLQWHPFTITSSSKLEPEKLSIVIKKEGKWSTKLHQRLSSSDQIDRLAVSVEGPYGPASADFLRHEALVMVCGGSGITPFISVIRDLIATSQKETCKIPKITLICAFKKSSEISMLDLVLPLSGLETELSSDINIKIEAFITRDNDAGDEAKAGKIKTLWFKPSLSDQSISSILGPNSWLWLGAILASSFLIFMIIIGIITRYYIYPIDHNTNKIYSLTSKTIIYILVISVSIMATCSAAMLWNKKKYGKVESKQVQNVDRPSPTSSPTSSWGYNSLREIESTPQESLVQRTNLHFGERPNLKKLLLDVEGSSVGVLVCGPKKMRQKVAEICSSGLAENLHFESISFSW"
, "(.{50})"
, RegexOptions.Multiline)
.Where(s => !string.IsNullOrEmpty(s))
.ToArray();
//> Result (courtesy of the great REPL Mono CS Shell2: http://www.mono-project.com/CsharpRepl )
{ "MEIEKSNNGGSNPSAGEEFKDMIKGVTKFLMMVIFLGTIMLWIMMPTLTY", "RTKWLPHLRIKFGTSTYFGATGTTLFMYMFPMMVVACLGCVYLHFKNRKS",
"PHHIDRETKGGVWSKLRKPMLVKGPLGIVSVTEITFLAMFVALLLWCFIT", "YLRNSFATITPKSAAAHDESLWQAKLESAALRLGLIGNICLAFLFLPVAR",
"GSSLLPAMGLTSESSIKYHIWLGHMVMALFTVHGLCYIIYWASMHEISQM", "IMWDTKGVSNLAGEIALAAGLVMWATTYPKIRRRFFEVFFYTHYLYIVFM",
"LFFVLHVGISFSFIALPGFYIFLVDRFLRFLQSRENVRLLAARILPSDTM", "ELTFSKNSKLVYSPTSIMFVNIPSISKLQWHPFTITSSSKLEPEKLSIVI",
"KKEGKWSTKLHQRLSSSDQIDRLAVSVEGPYGPASADFLRHEALVMVCGG", "SGITPFISVIRDLIATSQKETCKIPKITLICAFKKSSEISMLDLVLPLSG",
"LETELSSDINIKIEAFITRDNDAGDEAKAGKIKTLWFKPSLSDQSISSIL", "GPNSWLWLGAILASSFLIFMIIIGIITRYYIYPIDHNTNKIYSLTSKTII",
"YILVISVSIMATCSAAMLWNKKKYGKVESKQVQNVDRPSPTSSPTSSWGY", "NSLREIESTPQESLVQRTNLHFGERPNLKKLLLDVEGSSVGVLVCGPKKM",
"RQKVAEICSSGLAENLHFESISFSW"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.