Skip to content

Instantly share code, notes, and snippets.

@dizzi
Created February 8, 2011 11:41
Show Gist options
  • Save dizzi/816303 to your computer and use it in GitHub Desktop.
Save dizzi/816303 to your computer and use it in GitHub Desktop.
package pal;
public class LexAnal {
String line = null;
int index = 0;
int marker = 0;
String error = "";
String name = "";
String propval = "";
static int EOF = -1;
static final int R1 = 1, R2 = 2, R3 = 3, R4 = 4, R5 = 5, R6 = 6;
LexAnal(String line) {
line = line.replaceAll("\\(\\\\(?=\\d)", "\u0008");
line = line.replaceAll("\\\\\\.", "\u0001");
line = line.replaceAll("\\\\\\*", "\u0002");
line = line.replaceAll("\\\\\\+", "\u0003");
line = line.replaceAll("\\\\\\(", "\u0004");
line = line.replaceAll("\\\\\\)", "\u0005");
line = line.replaceAll("\\\\\\|", "\u0006");
line = line.replaceAll("\\\\\\\\", "\u0007");
this.line = line;
}
protected static boolean isdigit(char X) {
return (X >= '0' && X <= '9');
}
protected static boolean isSpecialChar(char X) {
return ((X == '.') || (X == '*') || (X == '+'));
}
protected static boolean isReducedASCII(char X) {
return ((X >= 0x01 && X <= 0x0FF) && (X != '\u0008') && (X != '.') && (X != '*') && (X != '+') && (X != '(')
&& (X != ')') && (X != '|') && (X != '\\'));
}
// set marker and read next character
private int getChar() {
if (index >= line.length())
return EOF;
marker = index;
return line.charAt(index++);
}
// set pointer one char back (to marked position)
private void pushBackChar() {
index = marker;
}
public void getToken() throws Exception {
char[] tmp = new char[1];
int myChar = 0;
int state = R1;
// String prop = "";
name = "";
propval = "";
// read chars and create tokens
try {
do {
// ///////////////////////////////////////////////////////////
myChar = getChar();
if (myChar == EOF && state != R1)
return;
else if (myChar == EOF) {
name = "finished";
return;
}
// throw new Exception("finished");
switch (state) {
case R1:
if (myChar == '(') { // rule 2
name = "oBracket";
state = R1;
return;
}
if (myChar == ')') { // rule 2
name = "cBracket";
state = R1;
return;
}
if (myChar == 0x08) { // rule 4
name = "brefNum";
state = R2;
pushBackChar();
break;
}
if (isReducedASCII((char) myChar)) { // rule 5
name = "redASCII";
state = R3;
pushBackChar();
break;
}
if (isSpecialChar((char) myChar)) { // rule 6
name = "specChar";
tmp[0] = (char) myChar;
propval = "" + new String(tmp);
state = R1;
return;
}
if (myChar == '|') {
name = "or";
state = R1;
return;
} else {
throw new Exception("bad char");
}
case R2:
if (isdigit((char) myChar)) {
tmp[0] = (char) myChar;
propval = propval + new String(tmp);
break;
} else if (myChar == ')') {
state = R1;
return;
} else {
new Exception("lexerror");
}
break;
case R3:
if (isReducedASCII((char) myChar)) {
tmp[0] = (char) myChar;
propval = propval + new String(tmp);
break;
} else {
pushBackChar();
state = R1;
return;
}
}
// ///////////////////////////////////////////////////////////
} while (true);
} catch (Exception e) {
if (e.getMessage().equals("finished"))
throw new Exception("finished");
tmp[0]=(char)myChar;
error = "Found \"" + new String(tmp) + "\" expected something else";
System.out.println(error);
throw new Exception("lexerror");
}
}
public static void main(String[] args) {
// LexAnal la = new LexAnal("\\. \\. \\| j\\\\ \\+ \\(");
// LexAnal la = new LexAnal("((xdss|y)+)=(\\2)");
LexAnal la = new LexAnal("***");
// LexAnal la = new LexAnal("(((\\(1\\))))");
try {
while (true) {
la.getToken();
System.out.println(String.format("%s %s", la.name, la.propval));
if(la.name.equals("finished")){
break;
}
}
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
}
package pal;
public class SynAnal {
LexAnal la = null;
SynAnal(LexAnal lea) {
la = lea;
}
public void start() throws Exception {
la.getToken();
RULE1();
}
// 1. S --> E
private void RULE1() throws Exception {
if (la.name.equals("oBracket")) {
RULE2();
} else if (la.name.equals("specChar")) {
RULE6();
} else if (la.name.equals("redASCII")) {
RULE5();
} else if (la.name.equals("brefNum")) {
RULE4();
} else if (la.name.equals("cBracket")) {
throw new Exception("Wrong initial rule symbol )");
} else if (la.name.equals("or")) {
RULE3();
} else if (la.name.equals("finished")) {
return;
} else {
RULE1();
}
}
// 2. E --> "("E")"E
private void RULE2() throws Exception {
la.getToken();
RULE1();
if (la.name.equals("cBracket")) {
la.getToken();
if(la.name.equals("cBracket")||la.name.equals("finished"))
return;
RULE1();
} else {
throw new Exception(") expected");
}
}
// 3. E --> E"|"E
private void RULE3() throws Exception{
la.getToken();
if(la.name.equals("cBracket")||la.name.equals("finished"))
return;
RULE1();
}
// 4. E --> "(\"CN")"E
// 11. N --> CN
// 12. N --> eplsilon
// 13. C --> libovolná císlice
private void RULE4() throws Exception {
la.getToken();
if(la.name.equals("cBracket")||la.name.equals("finished"))
return;
RULE1();
}
// 5. E --> ZE
// 8. Z --> libovolný ASCII znak mimo: .,*,+,(,),|,\
private void RULE5() throws Exception {
la.getToken();
if(la.name.equals("cBracket")||la.name.equals("finished"))
return;
RULE1();
}
// 6. E --> XE
// 9. X --> "." | "*" | "+"
// 10. X --> "\." | "\*" | "\+" | "\(" | "\)" | "\|" | "\\"
private void RULE6() throws Exception {
la.getToken();
if(la.name.equals("cBracket")||la.name.equals("finished"))
return;
RULE1();
}
public static void main(String[] args) {
LexAnal la = new LexAnal("((x((x(((x)xx))))))");
SynAnal sa = new SynAnal(la);
try {
sa.start();
System.out.println("Finished");
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
}
package pal;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class Main {
public static void main(String[] args) throws Exception {
BufferedReader br = null;
if (args.length>0){
if(args[0].equals("xxx"))
br = new BufferedReader(new FileReader("test.in"));
else
br = new BufferedReader(new FileReader(args[0]));
} else
br = new BufferedReader(new InputStreamReader(System.in));
String exp = br.readLine();
LexAnal la = new LexAnal(exp);
SynAnal sa = new SynAnal(la);
try{
sa.start();
} catch (Exception e){
System.out.println("0");
return;
}
exp = cripleString(exp);
boolean nonMatchingFlag = false;
Pattern p = null;
try{
p = Pattern.compile(exp);
} catch (PatternSyntaxException e){
nonMatchingFlag = true;
}
System.out.println("1");
String text = br.readLine();
while(text!=null && !text.equals("")){
if(nonMatchingFlag)
System.out.println("0");
else{
Matcher m = p.matcher(text);
if(m.matches()){
System.out.println("1");
} else {
System.out.println("0");
}
}
text = br.readLine();
}
System.out.println();
}
private static String cripleString(String exp) {
// 1. escape unwanted chars $ ^ [ ] { } \
exp = exp.replaceAll("\\[", "\\\\[");
exp = exp.replaceAll("\\]", "\\\\]");
exp = exp.replaceAll("\\^", "\\\\^");
exp = exp.replaceAll("\\{", "\\\\{");
exp = exp.replaceAll("\\}", "\\\\}");
exp = exp.replaceAll("\\$", "\\\\\\$");
// 2. add ( ) to + and * groups for backreferences
exp = bracketing(exp, '+');
exp = bracketing(exp, '*');
return exp;
}
private static String bracketing(String exp, char bracketChar) {
int close;
int open;
boolean check;
// boolean hasCandidate;
int counter;
for(int nextStart = exp.length()-1; nextStart!=0; ){
check = true; counter = 0;
// hasCandidate = false;
close = open = -1;
for(int i = nextStart; i>=0; i--){ // for every special character
if(exp.charAt(i)==bracketChar && i-1>=0 && exp.charAt(i-1)==')' && check){ // if firste checked specChar preceed by )
check = false;
nextStart=i-1; // shift next pass
close = i + 1; // set closing position
// if(i+1<exp.length() && exp.charAt(i+1)==')'){ // if ) follows it can be already bracketed
// hasCandidate = true; // its only candidate
// }
} else if(!check){ // ok count closing and opening brackets
if(exp.charAt(i)==')')
counter++;
if(exp.charAt(i)=='(')
counter--;
if(counter==0){ // its zero, we should place bracket here
// if(hasCandidate){ // if this is candidate check if it has closing bracket if yes, forget it
// if((i-1>=0)&&exp.charAt(i-1)=='(')
// break;
// }
open = i;
break;
}
}
if(i==0 && check){ // if we processed all the specchars and our index is 0, just break;
nextStart=0;
}
}
if(open >= 0 && close>= 0){ // process only non negative intervals
// System.out.println(String.format("%d %d", open, close));
StringBuffer sb = new StringBuffer(exp);
sb.insert(close, ')');
sb.insert(open, '(');
exp = sb.toString();
}
}
return exp;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment