Created
November 26, 2015 19:21
-
-
Save onzag/f48cc97395ccc638ee43 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import path | |
import json | |
import re | |
try: | |
from .Psy import * | |
from .PsyElem import * | |
from .psyutil import * | |
except: | |
from Psy import * | |
from PsyElem import * | |
from psyutil import * | |
class PsySyntaxError(Exception): | |
pass | |
class PsyCheckerError(Exception): | |
pass | |
class PsyParser(): | |
def __init__(self,location): | |
self.data = [] | |
self.indexof = {} | |
self.__header = re.compile('^[a-zA-Z0-9_]+') | |
self.__miniRegex = re.compile('^[a-zA-Z0-9_\\*\\?\\+\\~\\(\\)\\[\\]\\.\\{\\}\\@\\#\\\\]+') | |
control = open(path.join(location,'control.txt'),'r') | |
for line in control: | |
psyfile = line.split('###')[0].strip(); | |
if (len(psyfile) > 0): | |
self.__loadFile(path.join(location,psyfile)); | |
def extract(self,elems): | |
for exprgroup in self.data: | |
elems = exprgroup.extract(elems) | |
return PsyElem(elems,'SENTENCE','sentence',{'components':elems}) | |
def __loadFile(self,filename): | |
print('loading ' + filename) | |
f = open(filename,'r') | |
#The current expression group name | |
curExprGroupName = None | |
#the accumulated data for that expression | |
curExprGroupData = [] | |
#the line number | |
curLineNumber = 0; | |
for line in f: | |
#this is the real line without comments | |
curLine = line.split('###')[0].rstrip(); | |
curLineNumber+=1 | |
#if we got nothing we continue | |
if len(curLine) == 0: | |
continue; | |
#if the group name doesn't exist | |
elif curExprGroupName is None: | |
#we wait to get the first group name | |
match = self.__header.search(curLine); | |
#if we find it | |
if match is not None and match.start() == 0 and match.end() == len(curLine): | |
curExprGroupName = curLine | |
if (curExprGroupName[:2] == '__'): | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(curLineNumber) + ' at \n' + line + '\nInvalid name, cannot start with __') | |
elif (curExprGroupName in self.indexof.keys()): | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(curLineNumber) + ' at \n' + line + '\nExpression group was already defined') | |
#otherwise if we find something else to start with | |
else: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(curLineNumber) + ' at \n' + line + '\nInvalid Identation') | |
#if the line is idented | |
elif curLine[0] == '\t': | |
#we add that data | |
curExprGroupData.append((curLine,curLineNumber)) | |
#otherwise and if there's an active group | |
else: | |
#we check if there comes a new one | |
match = self.__header.search(curLine); | |
#if we have a match that means the previous ended | |
if match is not None and match.start() == 0: | |
#so we add the previous | |
self.__loadExprGroup(curExprGroupName,curExprGroupData,filename) | |
#and we reset the fields | |
curExprGroupName = curLine | |
curExprGroupData = [] | |
#otherwise there should be something wrong | |
else: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(curLineNumber) + ' at \n' + line + '\nInvalid Expression Group Name') | |
#if after the loop ends we still have data to add (which will most likely) | |
if (len(curExprGroupData) > 0): | |
#we add it | |
self.__loadExprGroup(curExprGroupName,curExprGroupData,filename) | |
def __loadExprGroup(self,name,data,filename): | |
exprGroup = PsyExprGroup(name); | |
for element in data: | |
line, lineNumber = element | |
#remove trailing \t | |
line = line[1:] | |
if (line[0] != '&'): | |
#create the expression for the expression | |
expr = PsyExpr(); | |
#splitting from data | |
checkers = json_split(line,'&') | |
#for every checker in the list of checkers | |
for checker in checkers: | |
checker_stripped = checker.strip() | |
if (len(checker_stripped) > 0): | |
expr.addChecker(self.__getChecker(checker_stripped,line,lineNumber,filename)) | |
else: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(lineNumber) + ' at \n' + line + '\nEmpty Checker') | |
exprGroup.addExpr(expr) | |
else: | |
if (line[1:6] == 'META '): | |
match = self.__miniRegex.search(line[6:]); | |
if (match is not None): | |
metaname = '__META__' + name + '_' + line[match.start()+6:match.end()+6] | |
rest = line[match.end()+6:].strip() | |
if (metaname not in self.indexof.keys()): | |
self.data.append(PsyExprGroup(metaname)) | |
self.indexof[metaname] = (len(self.data) - 1) | |
index = self.indexof[metaname] | |
expr = PsyExpr(); | |
checkers = json_split(rest,'&') | |
#for every checker in the list of checkers | |
for checker in checkers: | |
checker_stripped = checker.strip() | |
if (len(checker_stripped) > 0): | |
expr.addChecker(self.__getChecker(checker_stripped,line,lineNumber,filename)) | |
else: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(lineNumber) + ' at \n' + line + '\nEmpty Checker') | |
#add the expression | |
self.data[index].addExpr(expr) | |
else: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(lineNumber) + ' at \n' + line + '\nInvalid Metaname') | |
elif (line[1:6] == 'ATTR '): | |
match = self.__header.search(line[6:]); | |
if (match is not None): | |
attrname = line[match.start()+6:match.end()+6] | |
rest = line[match.end()+6:].strip() | |
factor = None | |
if (rest[:4] == 'ALL '): | |
factor = 'all' | |
rest = rest[4:].strip() | |
elif (rest[:6] == 'FIRST '): | |
factor = 'first' | |
rest = rest[6:].strip() | |
elif (rest[:5] == 'LAST '): | |
factor = 'last' | |
rest = rest[5:].strip() | |
elif (rest[:6] == 'VALUE '): | |
factor = 'value' | |
rest = rest[5:].strip() | |
elif (rest[:3] == 'IF '): | |
factor = 'if' | |
rest = rest[3:].strip() | |
else: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(lineNumber) + ' at \n' + line + '\nInvalid Attribute Factor') | |
if (factor != 'value'): | |
checkers = json_split(rest,'|') | |
checkersList = [] | |
for checker in checkers: | |
checker_stripped = checker.strip() | |
checkersList.append(self.__getChecker(checker_stripped,line,lineNumber,filename,False,False)) | |
exprGroup.addAttr(attrname,checkersList,factor) | |
else: | |
try: | |
value = json.loads(rest) | |
except: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(lineNumber) + ' at \n' + line + '\nInvalid JSON value') | |
exprGroup.addFixedAttr(attrname,value) | |
else: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(lineNumber) + ' at \n' + line + '\nInvalid Attribute Name') | |
elif (line[1:6] == 'TYPE '): | |
match = self.__header.search(line[6:]); | |
if (match is not None and match.end() == (len(line)-6)): | |
ty = line[match.start()+6:match.end()+6] | |
exprGroup.setType(ty) | |
else: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(lineNumber) + ' at \n' + line + '\nInvalid Type Name') | |
if (exprGroup.type is None and len(data) > 0): | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + | |
str(data[0][1]) + ' at \n' + str(data[0][0]) + '\nSpecial setter &TYPE not found for ' + name) | |
self.data.append(exprGroup) | |
self.indexof[name] = (len(self.data) - 1) | |
def __getChecker(self,data,line,lineNumber,filename,allowModifier=True,allowConditions=True): | |
checker = PsyChecker() | |
dataConsume = list(data) | |
if (allowModifier): | |
if (dataConsume[0] == '?'): | |
checker.setMayExist() | |
dataConsume.pop(0) | |
elif (dataConsume[0] == '*'): | |
checker.setZeroOrMore() | |
dataConsume.pop(0) | |
elif (dataConsume[0] == '+'): | |
checker.setOneOrMore(); | |
dataConsume.pop(0) | |
elif (dataConsume[0] == '~'): | |
checker.setDoNot() | |
dataConsume.pop(0) | |
if len(dataConsume) == 0: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' + | |
data + ' Cannot find anything to apply the modifier to') | |
if (dataConsume[0] in '+~*+'): | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' + | |
data + ' More than one modifier') | |
while (dataConsume[0] == ' '): | |
dataConsume.pop(0) | |
if (dataConsume[0] == '"'): | |
endind = json_str_consume(dataConsume) | |
if endind is None: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' + | |
data + ' Unfinished string') | |
else: | |
try: | |
value = json.loads(''.join(dataConsume[:endind+1])) | |
except: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' + | |
data + ' Invalid json string') | |
try: | |
checker.setValue(value) | |
except: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' + | |
data + ' Invalid regex string') | |
dataConsume[:endind+1] = [] | |
if len(dataConsume) == 0: | |
return(checker) | |
while (dataConsume[0] == ' '): | |
dataConsume.pop(0) | |
if len(dataConsume) == 0: | |
break; | |
rest = ''.join(dataConsume) | |
match = self.__miniRegex.search(rest); | |
if (match is not None): | |
try: | |
checker.setTag(rest[:match.end()]) | |
except: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' + | |
data + ' Invalid minimal regex tag') | |
dataConsume[:match.end()] = [] | |
if len(dataConsume) == 0: | |
return(checker) | |
while (dataConsume[0] == ' '): | |
dataConsume.pop(0) | |
if len(dataConsume) == 0: | |
return(checker) | |
if (allowConditions): | |
while True: | |
while (dataConsume[0] == ' '): | |
dataConsume.pop(0) | |
if len(dataConsume) == 0: | |
break; | |
if (dataConsume[0] == '<' or dataConsume[0] == '>'): | |
relativeInd = 0 | |
while dataConsume[0] in '<>': | |
if (dataConsume[0] == '>'): | |
relativeInd+=1 | |
else: | |
relativeInd-=1 | |
dataConsume.pop(0) | |
while (dataConsume[0] == ' '): | |
dataConsume.pop(0) | |
if len(dataConsume) == 0: | |
break; | |
value = None | |
if (dataConsume[0] == '"'): | |
endind = json_str_consume(dataConsume) | |
if endind is None: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' + | |
data + ' Unfinished string') | |
else: | |
try: | |
value = json.loads(''.join(dataConsume[:endind+1])) | |
except: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' + | |
data + ' Invalid json string') | |
value = value | |
dataConsume[:endind+1] = [] | |
tag = None | |
rest = ''.join(dataConsume) | |
match = self.__miniRegex.search(rest); | |
if (match is not None): | |
tag = rest[match.start():match.end()] | |
dataConsume[:match.end()] = [] | |
if tag is None and value is None: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' + | |
data + ' Useless rule') | |
try: | |
checker.setCondition(relativeInd,value,tag) | |
except: | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' + | |
data + ' Invalid regexs') | |
if (len(dataConsume) == 0): | |
break | |
else: | |
break | |
if (len(dataConsume) > 0): | |
raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' + | |
data + ' Cannot parse ' + rest) | |
return(checker) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment