Skip to content

Instantly share code, notes, and snippets.

@lindenb
Created July 26, 2022 21:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lindenb/f519f56e6ba1aea7a0f6b8887f941610 to your computer and use it in GitHub Desktop.
Save lindenb/f519f56e6ba1aea7a0f6b8887f941610 to your computer and use it in GitHub Desktop.
Biostar9532375 Remove redundant nucleotide sequences in a FASTA file containing ambiguous N

Compile:

javac Biostar9532375.java

execute

$ java Biostar9532375 <  in.fa
>seq1
ACNTACNT
>seq5
TCGTACTT
import java.io.*;
import java.util.*;
public class Biostar9532375 {
private static class Sequence {
String name;
StringBuilder seq=new StringBuilder();
boolean same(char c1,char c2) {
if(c1==c2) return true;
if(c1=='N' || c2=='N') return true;
return false;
}
boolean same(Sequence o) {
int L=seq.length();
if(L!=o.seq.length()) return false;
for(int i=0;i< L;i++) {
if(!same(seq.charAt(i),o.seq.charAt(i))) return false;
}
return true;
}
}
public static void main(final String[] args) {
try {
final List<Sequence> sequences =new ArrayList<>();
Sequence last=null;
try(BufferedReader br = new BufferedReader(new InputStreamReader(System.in))) {
for(;;) {
String line=br.readLine();
if(line==null || line.startsWith(">")) {
if(last!=null) {
final Sequence last2=last;
if(sequences.stream().noneMatch(S->S.same(last2))) {
sequences.add(last);
}
}
if(line==null) break;
last=new Sequence();
last.name=line;
continue;
}
else
{
if(last!=null) last.seq.append(line.trim().toUpperCase());
}
}
}
for(Sequence s:sequences) {
System.out.println(s.name);
System.out.println(s.seq);
}
}
catch(Throwable err) {
err.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment