Skip to content

Instantly share code, notes, and snippets.

@onzag
Created November 26, 2015 19:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save onzag/f48cc97395ccc638ee43 to your computer and use it in GitHub Desktop.
Save onzag/f48cc97395ccc638ee43 to your computer and use it in GitHub Desktop.
from os import path
import json
import re
try:
from .Psy import *
from .PsyElem import *
from .psyutil import *
except:
from Psy import *
from PsyElem import *
from psyutil import *
class PsySyntaxError(Exception):
pass
class PsyCheckerError(Exception):
pass
class PsyParser():
def __init__(self,location):
self.data = []
self.indexof = {}
self.__header = re.compile('^[a-zA-Z0-9_]+')
self.__miniRegex = re.compile('^[a-zA-Z0-9_\\*\\?\\+\\~\\(\\)\\[\\]\\.\\{\\}\\@\\#\\\\]+')
control = open(path.join(location,'control.txt'),'r')
for line in control:
psyfile = line.split('###')[0].strip();
if (len(psyfile) > 0):
self.__loadFile(path.join(location,psyfile));
def extract(self,elems):
for exprgroup in self.data:
elems = exprgroup.extract(elems)
return PsyElem(elems,'SENTENCE','sentence',{'components':elems})
def __loadFile(self,filename):
print('loading ' + filename)
f = open(filename,'r')
#The current expression group name
curExprGroupName = None
#the accumulated data for that expression
curExprGroupData = []
#the line number
curLineNumber = 0;
for line in f:
#this is the real line without comments
curLine = line.split('###')[0].rstrip();
curLineNumber+=1
#if we got nothing we continue
if len(curLine) == 0:
continue;
#if the group name doesn't exist
elif curExprGroupName is None:
#we wait to get the first group name
match = self.__header.search(curLine);
#if we find it
if match is not None and match.start() == 0 and match.end() == len(curLine):
curExprGroupName = curLine
if (curExprGroupName[:2] == '__'):
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(curLineNumber) + ' at \n' + line + '\nInvalid name, cannot start with __')
elif (curExprGroupName in self.indexof.keys()):
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(curLineNumber) + ' at \n' + line + '\nExpression group was already defined')
#otherwise if we find something else to start with
else:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(curLineNumber) + ' at \n' + line + '\nInvalid Identation')
#if the line is idented
elif curLine[0] == '\t':
#we add that data
curExprGroupData.append((curLine,curLineNumber))
#otherwise and if there's an active group
else:
#we check if there comes a new one
match = self.__header.search(curLine);
#if we have a match that means the previous ended
if match is not None and match.start() == 0:
#so we add the previous
self.__loadExprGroup(curExprGroupName,curExprGroupData,filename)
#and we reset the fields
curExprGroupName = curLine
curExprGroupData = []
#otherwise there should be something wrong
else:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(curLineNumber) + ' at \n' + line + '\nInvalid Expression Group Name')
#if after the loop ends we still have data to add (which will most likely)
if (len(curExprGroupData) > 0):
#we add it
self.__loadExprGroup(curExprGroupName,curExprGroupData,filename)
def __loadExprGroup(self,name,data,filename):
exprGroup = PsyExprGroup(name);
for element in data:
line, lineNumber = element
#remove trailing \t
line = line[1:]
if (line[0] != '&'):
#create the expression for the expression
expr = PsyExpr();
#splitting from data
checkers = json_split(line,'&')
#for every checker in the list of checkers
for checker in checkers:
checker_stripped = checker.strip()
if (len(checker_stripped) > 0):
expr.addChecker(self.__getChecker(checker_stripped,line,lineNumber,filename))
else:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(lineNumber) + ' at \n' + line + '\nEmpty Checker')
exprGroup.addExpr(expr)
else:
if (line[1:6] == 'META '):
match = self.__miniRegex.search(line[6:]);
if (match is not None):
metaname = '__META__' + name + '_' + line[match.start()+6:match.end()+6]
rest = line[match.end()+6:].strip()
if (metaname not in self.indexof.keys()):
self.data.append(PsyExprGroup(metaname))
self.indexof[metaname] = (len(self.data) - 1)
index = self.indexof[metaname]
expr = PsyExpr();
checkers = json_split(rest,'&')
#for every checker in the list of checkers
for checker in checkers:
checker_stripped = checker.strip()
if (len(checker_stripped) > 0):
expr.addChecker(self.__getChecker(checker_stripped,line,lineNumber,filename))
else:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(lineNumber) + ' at \n' + line + '\nEmpty Checker')
#add the expression
self.data[index].addExpr(expr)
else:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(lineNumber) + ' at \n' + line + '\nInvalid Metaname')
elif (line[1:6] == 'ATTR '):
match = self.__header.search(line[6:]);
if (match is not None):
attrname = line[match.start()+6:match.end()+6]
rest = line[match.end()+6:].strip()
factor = None
if (rest[:4] == 'ALL '):
factor = 'all'
rest = rest[4:].strip()
elif (rest[:6] == 'FIRST '):
factor = 'first'
rest = rest[6:].strip()
elif (rest[:5] == 'LAST '):
factor = 'last'
rest = rest[5:].strip()
elif (rest[:6] == 'VALUE '):
factor = 'value'
rest = rest[5:].strip()
elif (rest[:3] == 'IF '):
factor = 'if'
rest = rest[3:].strip()
else:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(lineNumber) + ' at \n' + line + '\nInvalid Attribute Factor')
if (factor != 'value'):
checkers = json_split(rest,'|')
checkersList = []
for checker in checkers:
checker_stripped = checker.strip()
checkersList.append(self.__getChecker(checker_stripped,line,lineNumber,filename,False,False))
exprGroup.addAttr(attrname,checkersList,factor)
else:
try:
value = json.loads(rest)
except:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(lineNumber) + ' at \n' + line + '\nInvalid JSON value')
exprGroup.addFixedAttr(attrname,value)
else:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(lineNumber) + ' at \n' + line + '\nInvalid Attribute Name')
elif (line[1:6] == 'TYPE '):
match = self.__header.search(line[6:]);
if (match is not None and match.end() == (len(line)-6)):
ty = line[match.start()+6:match.end()+6]
exprGroup.setType(ty)
else:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(lineNumber) + ' at \n' + line + '\nInvalid Type Name')
if (exprGroup.type is None and len(data) > 0):
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
str(data[0][1]) + ' at \n' + str(data[0][0]) + '\nSpecial setter &TYPE not found for ' + name)
self.data.append(exprGroup)
self.indexof[name] = (len(self.data) - 1)
def __getChecker(self,data,line,lineNumber,filename,allowModifier=True,allowConditions=True):
checker = PsyChecker()
dataConsume = list(data)
if (allowModifier):
if (dataConsume[0] == '?'):
checker.setMayExist()
dataConsume.pop(0)
elif (dataConsume[0] == '*'):
checker.setZeroOrMore()
dataConsume.pop(0)
elif (dataConsume[0] == '+'):
checker.setOneOrMore();
dataConsume.pop(0)
elif (dataConsume[0] == '~'):
checker.setDoNot()
dataConsume.pop(0)
if len(dataConsume) == 0:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
data + ' Cannot find anything to apply the modifier to')
if (dataConsume[0] in '+~*+'):
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
data + ' More than one modifier')
while (dataConsume[0] == ' '):
dataConsume.pop(0)
if (dataConsume[0] == '"'):
endind = json_str_consume(dataConsume)
if endind is None:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
data + ' Unfinished string')
else:
try:
value = json.loads(''.join(dataConsume[:endind+1]))
except:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
data + ' Invalid json string')
try:
checker.setValue(value)
except:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
data + ' Invalid regex string')
dataConsume[:endind+1] = []
if len(dataConsume) == 0:
return(checker)
while (dataConsume[0] == ' '):
dataConsume.pop(0)
if len(dataConsume) == 0:
break;
rest = ''.join(dataConsume)
match = self.__miniRegex.search(rest);
if (match is not None):
try:
checker.setTag(rest[:match.end()])
except:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
data + ' Invalid minimal regex tag')
dataConsume[:match.end()] = []
if len(dataConsume) == 0:
return(checker)
while (dataConsume[0] == ' '):
dataConsume.pop(0)
if len(dataConsume) == 0:
return(checker)
if (allowConditions):
while True:
while (dataConsume[0] == ' '):
dataConsume.pop(0)
if len(dataConsume) == 0:
break;
if (dataConsume[0] == '<' or dataConsume[0] == '>'):
relativeInd = 0
while dataConsume[0] in '<>':
if (dataConsume[0] == '>'):
relativeInd+=1
else:
relativeInd-=1
dataConsume.pop(0)
while (dataConsume[0] == ' '):
dataConsume.pop(0)
if len(dataConsume) == 0:
break;
value = None
if (dataConsume[0] == '"'):
endind = json_str_consume(dataConsume)
if endind is None:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
data + ' Unfinished string')
else:
try:
value = json.loads(''.join(dataConsume[:endind+1]))
except:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
data + ' Invalid json string')
value = value
dataConsume[:endind+1] = []
tag = None
rest = ''.join(dataConsume)
match = self.__miniRegex.search(rest);
if (match is not None):
tag = rest[match.start():match.end()]
dataConsume[:match.end()] = []
if tag is None and value is None:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
data + ' Useless rule')
try:
checker.setCondition(relativeInd,value,tag)
except:
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
data + ' Invalid regexs')
if (len(dataConsume) == 0):
break
else:
break
if (len(dataConsume) > 0):
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
data + ' Cannot parse ' + rest)
return(checker)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment