flisboac/Tokenizer.java

## Tokenizer.java
package flisboac;

import java.io.IOException;
import java.util.Scanner;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.regex.Pattern;

public class Tokenizer {

    public static class TokenizerException extends Exception {
    }

    public static class AlphabetException extends TokenizerException {
    }

    public static final String AnyWhitespaceRegex = "\\p{javaWhitespace}*";

    public static enum TokenType {

        Identifier("id", "[a-zA-Z_][a-zA-Z_0-9]*"),
        ArithmeticOperator("op", "[=+-/*]"),
        OpeningParens("par", "\\("),
        ClosingParens("par", "\\)"),
        NumericLiteral("lit", "[0-9]+");

        private final String name;
        private final String regex;
        private final Pattern pattern;

        private TokenType(String name, String regex) {
            this.name = name;
            this.regex = regex;
            this.pattern = Pattern.compile(regex);
        }

        public String getName() {
            return name;
        }

        public String getRegex() {
            return regex;
        }

        public Pattern getPattern() {
            return pattern;
        }
    }

    public static class Token {

        private TokenType tokenType;
        private String parsedText;
        private Integer identifierIndex;
        private String asString;
        private Double asDouble;

        public Token() {
        }

        public Token(TokenType tokenType, String parsedText) {
            this(tokenType, parsedText, null);
        }

        public Token(TokenType tokenType, String parsedText, Integer identifierIndex) {
            this.tokenType = tokenType;
            this.parsedText = parsedText;
            this.identifierIndex = identifierIndex;
            switch (this.tokenType) {
                case NumericLiteral:
                    this.asString = parsedText;
                    this.asDouble = Double.parseDouble(this.parsedText);
                    break;
                default:
                    this.asString = parsedText;
                    break;
            }
        }

        public TokenType getTokenType() {
            return tokenType;
        }

        public String getParsedText() {
            return parsedText;
        }

        public Integer getIdentifierIndex() {
            return identifierIndex;
        }

        public String getAsString() {
            return asString;
        }

        public Double getAsDouble() {
            return asDouble;
        }
    }

    public <K, V extends Scanner> Map.Entry<K, V> findSmallestMatch(Map<K, V> matches) {
        Map.Entry<K, V> smallest = null;

        for (Map.Entry<K, V> match : matches.entrySet()) {
            if (match.getValue().match().start() == 0) {
                if (smallest == null
                        || smallest.getValue().match().end() > match.getValue().match().end()) {
                    smallest = match;
                }
            }
        }

        return smallest;
    }

    public List<Token> scan(String expression) throws TokenizerException, IOException {
        expression = expression.trim();
        int inputIndex = 0;
        List<Token> tokens = new ArrayList<>();
        Map<String, Integer> identifiers = new HashMap<>();

        while (inputIndex < expression.length()) {
            String subInput = expression.substring(inputIndex);
            Map<TokenType, Scanner> scanners = new HashMap<>();

            for (TokenType type : TokenType.values()) {
                String regex = AnyWhitespaceRegex + type.getRegex();
                Scanner typeScanner = new Scanner(subInput);
                String tokenText = typeScanner.findWithinHorizon(regex, 0);

                if (tokenText != null) {
                    scanners.put(type, typeScanner);
                }
            }

            Map.Entry<TokenType, Scanner> smallestMatch = findSmallestMatch(scanners);
            if (smallestMatch == null) {
                throw new AlphabetException();

            } else {
                TokenType type = smallestMatch.getKey();
                String tokenText = smallestMatch.getValue().match().group().trim();
                Integer identifierIndex = null;
                inputIndex += smallestMatch.getValue().match().end();

                if (type.equals(TokenType.Identifier)) {
                    if (identifiers.containsKey(tokenText)) {
                        identifierIndex = identifiers.get(tokenText);

                    } else {
                        identifierIndex = identifiers.size() + 1;
                        identifiers.put(tokenText, identifierIndex);
                    }
                }

                Token token = new Token(type, tokenText, identifierIndex);
                tokens.add(token);
            }
        }

        return tokens;
    }

    public static void main(String[] args) {
        StringBuilder buffer = new StringBuilder();

        for (String arg : args) {
            buffer.append(arg);
            buffer.append(" ");
        }

        try {
            Tokenizer tokenizer = new Tokenizer();
            List<Token> tokens = tokenizer.scan(buffer.toString());

            for (Token token : tokens) {
                System.out.print("<");
                System.out.print(token.getTokenType().getName());
                System.out.print(":");
                Integer identifier = token.getIdentifierIndex();

                if (identifier != null) {
                    System.out.print(identifier);
                } else {
                    System.out.print(token.getAsString());
                }

                System.out.print(">");
            }

            System.out.println();

        } catch (TokenizerException ex) {
            System.err.println("Invalid syntax.");
            System.exit(1);

        } catch (IOException ex) {
            System.err.println("Error while reading input.");
            System.exit(2);
        }

        System.exit(0);
    }

}
	package flisboac;

	import java.io.IOException;
	import java.util.Scanner;
	import java.util.List;
	import java.util.Map;
	import java.util.HashMap;
	import java.util.ArrayList;
	import java.util.regex.Pattern;

	public class Tokenizer {

	public static class TokenizerException extends Exception {
	}

	public static class AlphabetException extends TokenizerException {
	}

	public static final String AnyWhitespaceRegex = "\\p{javaWhitespace}*";

	public static enum TokenType {

	Identifier("id", "[a-zA-Z_][a-zA-Z_0-9]*"),
	ArithmeticOperator("op", "[=+-/*]"),
	OpeningParens("par", "\\("),
	ClosingParens("par", "\\)"),
	NumericLiteral("lit", "[0-9]+");

	private final String name;
	private final String regex;
	private final Pattern pattern;

	private TokenType(String name, String regex) {
	this.name = name;
	this.regex = regex;
	this.pattern = Pattern.compile(regex);
	}

	public String getName() {
	return name;
	}

	public String getRegex() {
	return regex;
	}

	public Pattern getPattern() {
	return pattern;
	}
	}

	public static class Token {

	private TokenType tokenType;
	private String parsedText;
	private Integer identifierIndex;
	private String asString;
	private Double asDouble;

	public Token() {
	}

	public Token(TokenType tokenType, String parsedText) {
	this(tokenType, parsedText, null);
	}

	public Token(TokenType tokenType, String parsedText, Integer identifierIndex) {
	this.tokenType = tokenType;
	this.parsedText = parsedText;
	this.identifierIndex = identifierIndex;
	switch (this.tokenType) {
	case NumericLiteral:
	this.asString = parsedText;
	this.asDouble = Double.parseDouble(this.parsedText);
	break;
	default:
	this.asString = parsedText;
	break;
	}
	}

	public TokenType getTokenType() {
	return tokenType;
	}

	public String getParsedText() {
	return parsedText;
	}

	public Integer getIdentifierIndex() {
	return identifierIndex;
	}

	public String getAsString() {
	return asString;
	}

	public Double getAsDouble() {
	return asDouble;
	}
	}

	public <K, V extends Scanner> Map.Entry<K, V> findSmallestMatch(Map<K, V> matches) {
	Map.Entry<K, V> smallest = null;

	for (Map.Entry<K, V> match : matches.entrySet()) {
	if (match.getValue().match().start() == 0) {
	if (smallest == null
	\|\| smallest.getValue().match().end() > match.getValue().match().end()) {
	smallest = match;
	}
	}
	}

	return smallest;
	}

	public List<Token> scan(String expression) throws TokenizerException, IOException {
	expression = expression.trim();
	int inputIndex = 0;
	List<Token> tokens = new ArrayList<>();
	Map<String, Integer> identifiers = new HashMap<>();

	while (inputIndex < expression.length()) {
	String subInput = expression.substring(inputIndex);
	Map<TokenType, Scanner> scanners = new HashMap<>();

	for (TokenType type : TokenType.values()) {
	String regex = AnyWhitespaceRegex + type.getRegex();
	Scanner typeScanner = new Scanner(subInput);
	String tokenText = typeScanner.findWithinHorizon(regex, 0);

	if (tokenText != null) {
	scanners.put(type, typeScanner);
	}
	}

	Map.Entry<TokenType, Scanner> smallestMatch = findSmallestMatch(scanners);
	if (smallestMatch == null) {
	throw new AlphabetException();

	} else {
	TokenType type = smallestMatch.getKey();
	String tokenText = smallestMatch.getValue().match().group().trim();
	Integer identifierIndex = null;
	inputIndex += smallestMatch.getValue().match().end();

	if (type.equals(TokenType.Identifier)) {
	if (identifiers.containsKey(tokenText)) {
	identifierIndex = identifiers.get(tokenText);

	} else {
	identifierIndex = identifiers.size() + 1;
	identifiers.put(tokenText, identifierIndex);
	}
	}

	Token token = new Token(type, tokenText, identifierIndex);
	tokens.add(token);
	}
	}

	return tokens;
	}

	public static void main(String[] args) {
	StringBuilder buffer = new StringBuilder();

	for (String arg : args) {
	buffer.append(arg);
	buffer.append(" ");
	}

	try {
	Tokenizer tokenizer = new Tokenizer();
	List<Token> tokens = tokenizer.scan(buffer.toString());

	for (Token token : tokens) {
	System.out.print("<");
	System.out.print(token.getTokenType().getName());
	System.out.print(":");
	Integer identifier = token.getIdentifierIndex();

	if (identifier != null) {
	System.out.print(identifier);
	} else {
	System.out.print(token.getAsString());
	}

	System.out.print(">");
	}

	System.out.println();

	} catch (TokenizerException ex) {
	System.err.println("Invalid syntax.");
	System.exit(1);

	} catch (IOException ex) {
	System.err.println("Error while reading input.");
	System.exit(2);
	}

	System.exit(0);
	}

	}