Skip to content

Instantly share code, notes, and snippets.

@Adrodoc
Created August 12, 2019 20:00
Show Gist options
  • Save Adrodoc/2f3739301b140b84a69cff4c50bd0291 to your computer and use it in GitHub Desktop.
Save Adrodoc/2f3739301b140b84a69cff4c50bd0291 to your computer and use it in GitHub Desktop.
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class Node {
int start;
int end;
Node suffixlink;
HashMap<String,Node> children = new HashMap<>();
public Node next;
boolean isleaf = false;
public Node(int start, int end) {
this.start = start;
this.end = end;
}
public String getlabel(String s ) {
if(this.start == -1) return "root";
String label = s.substring(this.start, this.end) ;
return label;
}
public Node() {
}
}
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
public class suffixTree {
private Node root ;
private Node suffixstate;
int suffixlink;
String s;
private ActivePoint activepoint;
private int remainder ;
public suffixTree() {
root = new Node(-1,-1);//split node
activepoint = new ActivePoint(root, null, 0); //(active_node,active_edge,active_length)
}
public void build(String s) {
this.s= s;
int stringlength = s.length();
for(int i=0 ;i<s.length();i++) {
// System.out.println("add: " + s.charAt(i));
insert(s.charAt(i), i, stringlength);
}
System.out.println("output: ");
// printTree(root);
}
public void printTree(Node n ) {
for(String key:n.children.keySet()) {
Node child = n.children.get(key);
System.out.println(n.getlabel(s)+ "-- " + key + " ---> " + child.getlabel(s));//
if(child.suffixlink != null) System.out.println(" suffixlink: " + child.getlabel(s)+ " --> " + child.suffixlink.getlabel(s));
printTree(child);
}
}
public int edgelength (Node node) {
if(node == root) {
return 0;
}
return node.end-node.start+1;
}
public void addsuffixlink(Node node) {
if(suffixstate!=null) {
suffixstate.suffixlink = node;
}
suffixstate = node;
}
//**creat a suffix tree*//
public void insert(Character c, int index, int stringlength) {
suffixstate = null;
remainder++; // reminder+1,activePoint.length+1
suffixlink = 0;
String ch = c.toString();
//HashMap<String, Node> child = root.children;
Node split ;
while(remainder > 0) {
System.out.println(" reaminder " + " "+remainder + " activepoint. edge " + activepoint.edge + " " + " character " + " " + c + " activelenth" + " " +activepoint.length );
if(activepoint.length==0) {
activepoint.edge = ch; //insert the first character
}
if(activepoint.point.children.containsKey(activepoint.edge)) { // if a suffix is found in this tree
Node next = activepoint.point.children.get(activepoint.edge);
//Character n = s.charAt((next.start + activepoint.length));
//String activel = n.toString();
//System.out.println("ap: "+activepoint.edge+" active l: "+activel);
if(activepoint.length >=next.end-next.start ) { //if activepoint.length is longer than edge
activepoint.length -= next.end-next.start;
activepoint.edge += index-activepoint.length; // ?
activepoint.point = next;
continue;
}
if(s.charAt(next.start+ activepoint.length) ==c) { //activepoint verschieben
activepoint.length++;
addsuffixlink(activepoint.point);
break;
}
//if(!activel.equals(ch)) { //***case1: innersplit** //
split = new Node(next.start, next.start+activepoint.length);
activepoint.point.children.put(activepoint.edge, split);
System.out.println("innersplit edge at " + activepoint.edge + ":" );
SimpleDateFormat formatter= new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z");
Date date = new Date(System.currentTimeMillis());
System.out.println(formatter.format(date));
split.children.put(s.substring(index), next);
System.out.println("------ " + s.substring(index) + " " + index );
Node newleaf = new Node(index,stringlength);
split.children.put(ch, newleaf);// from split to new leaf
next.start += activepoint.length;
Character chara = s.charAt(next.start);
split.children.put(chara.toString(), next);
addsuffixlink(split);
}
else { // **case3 insert a new edge **//
Node newleaf= new Node(index,stringlength);
activepoint.point.children.put(activepoint.edge, newleaf);
addsuffixlink(activepoint.point);
}
remainder--;
if (activepoint.point == root && activepoint.length > 0) {
activepoint.length--;
Character nextchar = s.charAt(index-remainder+1);
activepoint.edge = nextchar.toString();
} else {
if(activepoint.point.suffixlink != null) {
activepoint.point = activepoint.point.suffixlink;
}else {
activepoint.point = root;
break;
}
}
}
System.out.println( " -----------> ");
}
private class ActivePoint {
public Node point;
public String edge;
public int length;
public ActivePoint(Node point, String edge, int length) {
this.point = point;
this.edge = edge;
this.length = length;
}
}
public boolean search (String str, Node n) {
//Node n = null;
//HashMap<String, Node> child = root.children;
//char[] c = str.toCharArray();
// int index ;
if(n.getlabel(s).startsWith(str)) {
System.out.println("finded" );
return true;
}
for(String key:n.children.keySet()) {
Node child = n.children.get(key);
System.out.println(n.getlabel(s)+ "-- " + key + " ---> " + child.getlabel(s));//
boolean re= search(str,child);
if(re) {
return true;
}
}
/*
while (root!=null) {
for(String key:n.children.keySet()) {
Node child = n.children.get(key);
System.out.println(n.getlabel(s)+ "-- " + key + " ---> " + child.getlabel(s));//
print(child);
}
for (int i=0;i<str.length();i++) {
//char ch = c[i];
//String r = String.valueOf(ch);
if (child.containsKey(str.substring(i, i+1)) ) {
cur = child.get(str.substring(i, i+1));
child = cur.children;
System.out.println(child);
// index = cur.start;
for (index = cur.start; index<cur.end;index++ ) {
if(s.charAt(index) !=str.charAt(i)) {
return false ;
}
}
}
}
}*/
return false;
}
public static String readFileAsString(String fileName)throws Exception {
String data = "";
data = new String(Files.readAllBytes(Paths.get(fileName)));
return data;
}
public static void main(String[] args) throws Exception {
suffixTree tree = new suffixTree();
//***test ***//
//String t = "abxcabdex$";
// String t = "axabxb";
//String t = "xabxa#babxba$";
//String t = "axabxbxba";
String t = "cocoa";
tree.build(t);
// System.out.println(tree.search("cocoa", tree.root));
//System.out.println(tree.search("axabxb"));
//System.out.println(tree.search("axabx"));
SimpleDateFormat formatter= new SimpleDateFormat("yyyy-MM-dd 'at' HH:mm:ss z");
Date date = new Date(System.currentTimeMillis());
System.out.println(formatter.format(date));
String data = readFileAsString("/Users/liweichen/Desktop/doc.txt");
byte[] bytes = data.getBytes("UTF-8");
String s2 = new String(bytes, "UTF-8");
//System.out.println(s2);
//System.out.println(data.length());
//tree.build(s2);
// 1.zeitmessung 2.search suffix 3.graph
//System.out.println(formatter.format(date));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment