libliflin/Searcher.java

## Searcher.java
import java.util.*;
import java.util.function.Function;

/**
 * // todo: not threadsafe
 * // todo: could be faster with regards to TLBs
 * // todo: could multi-thread the indexer.
 * // todo: index all the things; not just whole words.
 * // todo: if you index all the things; a prefix tree would probably help; ESPECIALLY with space.
 * // todo: charsequence: what's that?
 */
public class Searcher {

    public static Function<Character, Boolean> TABLE_NAME = (c) -> !(Character.isLetter(c) || c == '_' || c == '$');
    public static Function<Character, char[]> UPPERCASE = (c) -> new char[]{Character.toUpperCase(c)};

    private final char[] rawText;
    private final Function<Character, Boolean> isWhiteSpace;
    private final Function<Character, char[]> translate;
    // translated text -> index in raw text
    private Map<String, List<Integer>> index = new HashMap<>();

    public Searcher(String text){
        this(text, TABLE_NAME, UPPERCASE);
    }

    public Searcher(String text, Function<Character, Boolean> isWhiteSpace){
        this(text, isWhiteSpace, UPPERCASE);
    }

    public Searcher(String text, Function<Character, Boolean> isWhiteSpace, Function<Character, char[]> translate){
        this.rawText = text.toCharArray();
        this.isWhiteSpace = isWhiteSpace;
        this.translate = translate;
        index();
    }

    // null/"" -> -1
    public int count(String test){
        if(test == null || test.isEmpty()){
            return -1;
        }
        String xlated = getTranslated(0, test.length(), test.toCharArray());

        List<Integer> indecies = index.get(xlated);
        if(indecies == null){
            return 0;
        } else {
            return indecies.size();
        }
    }

    // null/"" -> null
    public List<String> contexts(String test, int before, int after){
        if(test == null || test.isEmpty()){
            return null;
        }
        String xlated = getTranslated(0, test.length(), test.toCharArray());

        List<Integer> indecies = index.get(xlated);

        List<String> contexts = new ArrayList<>();
        if(indecies == null){
            return contexts;
        }
        for(Integer index : indecies){
            int start = index - before;
            int realStart = Math.max(start, 0);
            int end = index + test.length() + after;
            int realEnd = Math.min(rawText.length, end);
            int offset = realEnd - realStart;

            StringBuilder sb = new StringBuilder();
            sb.append(rawText, realStart, offset);
            contexts.add(sb.toString());
        }
        return contexts;
    }


    private void index(){
        int max = rawText.length;
        // current non-whitespace stuffs;
        int beginIndex = 0;
        int endIndex = 0;
        for (int i = 0; i < max; i++) {
            /* eat the whitespace */
            while (isWhiteSpace.apply(rawText[i]) && ++i < max);

            if(i == max) break;

            beginIndex = i;
            while (++i < max && !isWhiteSpace.apply(rawText[i]));
            endIndex = i;

            // todo: add another loop to not mess with the cache by re-going over the stuffs.
            // todo: or inline the stuffs.
            addToIndex(beginIndex, endIndex);
        }
    }

    private void addToIndex(int beginIndex, int endIndex){
        // todo: make so you can parallelize this with CAS.
        String translated = getTranslated(beginIndex, endIndex, rawText);
        List<Integer> integers = index.get(translated);
        if(integers == null){
            integers = new ArrayList<>();
        }
        integers.add(beginIndex);
        index.put(translated, integers);
    }

    private String getTranslated(int beginIndex, int endIndex, char[] xlate){
        // todo: buffer me plz
        StringBuilder builder = new StringBuilder();
        for(int i = beginIndex; i < endIndex; i++){
            char[] apply = translate.apply(xlate[i]);
            builder.append(apply);
        }
        return builder.toString();
    }
}
	import java.util.*;
	import java.util.function.Function;

	/**
	* // todo: not threadsafe
	* // todo: could be faster with regards to TLBs
	* // todo: could multi-thread the indexer.
	* // todo: index all the things; not just whole words.
	* // todo: if you index all the things; a prefix tree would probably help; ESPECIALLY with space.
	* // todo: charsequence: what's that?
	*/
	public class Searcher {

	public static Function<Character, Boolean> TABLE_NAME = (c) -> !(Character.isLetter(c) \|\| c == '_' \|\| c == '$');
	public static Function<Character, char[]> UPPERCASE = (c) -> new char[]{Character.toUpperCase(c)};

	private final char[] rawText;
	private final Function<Character, Boolean> isWhiteSpace;
	private final Function<Character, char[]> translate;
	// translated text -> index in raw text
	private Map<String, List<Integer>> index = new HashMap<>();

	public Searcher(String text){
	this(text, TABLE_NAME, UPPERCASE);
	}

	public Searcher(String text, Function<Character, Boolean> isWhiteSpace){
	this(text, isWhiteSpace, UPPERCASE);
	}

	public Searcher(String text, Function<Character, Boolean> isWhiteSpace, Function<Character, char[]> translate){
	this.rawText = text.toCharArray();
	this.isWhiteSpace = isWhiteSpace;
	this.translate = translate;
	index();
	}

	// null/"" -> -1
	public int count(String test){
	if(test == null \|\| test.isEmpty()){
	return -1;
	}
	String xlated = getTranslated(0, test.length(), test.toCharArray());

	List<Integer> indecies = index.get(xlated);
	if(indecies == null){
	return 0;
	} else {
	return indecies.size();
	}
	}

	// null/"" -> null
	public List<String> contexts(String test, int before, int after){
	if(test == null \|\| test.isEmpty()){
	return null;
	}
	String xlated = getTranslated(0, test.length(), test.toCharArray());

	List<Integer> indecies = index.get(xlated);

	List<String> contexts = new ArrayList<>();
	if(indecies == null){
	return contexts;
	}
	for(Integer index : indecies){
	int start = index - before;
	int realStart = Math.max(start, 0);
	int end = index + test.length() + after;
	int realEnd = Math.min(rawText.length, end);
	int offset = realEnd - realStart;

	StringBuilder sb = new StringBuilder();
	sb.append(rawText, realStart, offset);
	contexts.add(sb.toString());
	}
	return contexts;
	}


	private void index(){
	int max = rawText.length;
	// current non-whitespace stuffs;
	int beginIndex = 0;
	int endIndex = 0;
	for (int i = 0; i < max; i++) {
	/* eat the whitespace */
	while (isWhiteSpace.apply(rawText[i]) && ++i < max);

	if(i == max) break;

	beginIndex = i;
	while (++i < max && !isWhiteSpace.apply(rawText[i]));
	endIndex = i;

	// todo: add another loop to not mess with the cache by re-going over the stuffs.
	// todo: or inline the stuffs.
	addToIndex(beginIndex, endIndex);
	}
	}

	private void addToIndex(int beginIndex, int endIndex){
	// todo: make so you can parallelize this with CAS.
	String translated = getTranslated(beginIndex, endIndex, rawText);
	List<Integer> integers = index.get(translated);
	if(integers == null){
	integers = new ArrayList<>();
	}
	integers.add(beginIndex);
	index.put(translated, integers);
	}

	private String getTranslated(int beginIndex, int endIndex, char[] xlate){
	// todo: buffer me plz
	StringBuilder builder = new StringBuilder();
	for(int i = beginIndex; i < endIndex; i++){
	char[] apply = translate.apply(xlate[i]);
	builder.append(apply);
	}
	return builder.toString();
	}
	}