onzag/notcute.py

## notcute.py
from os import path
import json
import re

try:
	from .Psy import *
	from .PsyElem import *
	from .psyutil import *
except:
	from Psy import *
	from PsyElem import *
	from psyutil import *

class PsySyntaxError(Exception):
	pass

class PsyCheckerError(Exception):
	pass

class PsyParser():
	def __init__(self,location):

		self.data = []
		self.indexof = {}

		self.__header = re.compile('^[a-zA-Z0-9_]+')
		self.__miniRegex = re.compile('^[a-zA-Z0-9_\\*\\?\\+\\~\\(\\)\\[\\]\\.\\{\\}\\@\\#\\\\]+')

		control = open(path.join(location,'control.txt'),'r')
		for line in control:
			psyfile = line.split('###')[0].strip();
			if (len(psyfile) > 0):
				self.__loadFile(path.join(location,psyfile));


	def extract(self,elems):
		for exprgroup in self.data:
			elems = exprgroup.extract(elems)
		return PsyElem(elems,'SENTENCE','sentence',{'components':elems})

	def __loadFile(self,filename):
		print('loading ' + filename)
		f = open(filename,'r')

		#The current expression group name
		curExprGroupName = None
		#the accumulated data for that expression
		curExprGroupData = []
		#the line number
		curLineNumber = 0;

		for line in f:

			#this is the real line without comments
			curLine = line.split('###')[0].rstrip();
			curLineNumber+=1

			#if we got nothing we continue
			if len(curLine) == 0:
				continue;
			#if the group name doesn't exist
			elif curExprGroupName is None:
				#we wait to get the first group name
				match = self.__header.search(curLine);

				#if we find it
				if match is not None and match.start() == 0 and match.end() == len(curLine):
					curExprGroupName = curLine
					if (curExprGroupName[:2] == '__'):
						raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
							str(curLineNumber) + ' at \n' + line + '\nInvalid name, cannot start with __')
					elif (curExprGroupName in self.indexof.keys()):
						raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
							str(curLineNumber) + ' at \n' + line + '\nExpression group was already defined')

				#otherwise if we find something else to start with
				else:
					raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
						str(curLineNumber) + ' at \n' + line + '\nInvalid Identation')

			#if the line is idented
			elif curLine[0] == '\t':
				#we add that data
				curExprGroupData.append((curLine,curLineNumber))

			#otherwise and if there's an active group
			else:
				#we check if there comes a new one
				match = self.__header.search(curLine);

				#if we have a match that means the previous ended
				if match is not None and match.start() == 0:
					#so we add the previous
					self.__loadExprGroup(curExprGroupName,curExprGroupData,filename)

					#and we reset the fields
					curExprGroupName = curLine
					curExprGroupData = []

				#otherwise there should be something wrong
				else:
					raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
						str(curLineNumber) + ' at \n' + line + '\nInvalid Expression Group Name')


		#if after the loop ends we still have data to add (which will most likely)
		if (len(curExprGroupData) > 0):
			#we add it
			self.__loadExprGroup(curExprGroupName,curExprGroupData,filename)

	def __loadExprGroup(self,name,data,filename):

		exprGroup = PsyExprGroup(name);
		for element in data:
			line, lineNumber = element

			#remove trailing \t
			line = line[1:]

			if (line[0] != '&'):
				#create the expression for the expression
				expr = PsyExpr();

				#splitting from data
				checkers = json_split(line,'&')

				#for every checker in the list of checkers
				for checker in checkers:
					checker_stripped = checker.strip()
					if (len(checker_stripped) > 0):
						expr.addChecker(self.__getChecker(checker_stripped,line,lineNumber,filename))
					else:
						raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
							str(lineNumber) + ' at \n' + line + '\nEmpty Checker')

				exprGroup.addExpr(expr)
			else:
				if (line[1:6] == 'META '):
					match = self.__miniRegex.search(line[6:]);
					if (match is not None):

						metaname = '__META__' + name + '_' + line[match.start()+6:match.end()+6]
						rest = line[match.end()+6:].strip()

						if (metaname not in self.indexof.keys()):
							self.data.append(PsyExprGroup(metaname))
							self.indexof[metaname] = (len(self.data) - 1)

						index = self.indexof[metaname]
						expr = PsyExpr();
						checkers = json_split(rest,'&')

						#for every checker in the list of checkers
						for checker in checkers:
							checker_stripped = checker.strip()
							if (len(checker_stripped) > 0):
								expr.addChecker(self.__getChecker(checker_stripped,line,lineNumber,filename))
							else:
								raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
									str(lineNumber) + ' at \n' + line + '\nEmpty Checker')

						#add the expression
						self.data[index].addExpr(expr)
					else:
						raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
							str(lineNumber) + ' at \n' + line + '\nInvalid Metaname')

				elif (line[1:6] == 'ATTR '):
					match = self.__header.search(line[6:]);
					if (match is not None):
						attrname = line[match.start()+6:match.end()+6]
						rest = line[match.end()+6:].strip()

						factor = None
						if (rest[:4] == 'ALL '):
							factor = 'all'
							rest = rest[4:].strip()
						elif (rest[:6] == 'FIRST '):
							factor = 'first'
							rest = rest[6:].strip()
						elif (rest[:5] == 'LAST '):
							factor = 'last'
							rest = rest[5:].strip()
						elif (rest[:6] == 'VALUE '):
							factor = 'value'
							rest = rest[5:].strip()
						elif (rest[:3] == 'IF '):
							factor = 'if'
							rest = rest[3:].strip()
						else:
							raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
								str(lineNumber) + ' at \n' + line + '\nInvalid Attribute Factor')

						if (factor != 'value'):
							checkers = json_split(rest,'|')
							checkersList = []
							for checker in checkers:
								checker_stripped = checker.strip()
								checkersList.append(self.__getChecker(checker_stripped,line,lineNumber,filename,False,False))
							exprGroup.addAttr(attrname,checkersList,factor)
						else:
							try:
								value = json.loads(rest)
							except:
								raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
									str(lineNumber) + ' at \n' + line + '\nInvalid JSON value')
							exprGroup.addFixedAttr(attrname,value)
					else:
						raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
							str(lineNumber) + ' at \n' + line + '\nInvalid Attribute Name')

				elif (line[1:6] == 'TYPE '):
					match = self.__header.search(line[6:]);
					if (match is not None and match.end() == (len(line)-6)):
						ty = line[match.start()+6:match.end()+6]
						exprGroup.setType(ty)
					else:
						raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
							str(lineNumber) + ' at \n' + line + '\nInvalid Type Name')

		if (exprGroup.type is None and len(data) > 0):
			raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
							str(data[0][1]) + ' at \n' + str(data[0][0]) + '\nSpecial setter &TYPE not found for ' + name)

		self.data.append(exprGroup)
		self.indexof[name] = (len(self.data) - 1)

	def __getChecker(self,data,line,lineNumber,filename,allowModifier=True,allowConditions=True):

		checker = PsyChecker()

		dataConsume = list(data)
		if (allowModifier):
			if (dataConsume[0] == '?'):
				checker.setMayExist()
				dataConsume.pop(0)
			elif (dataConsume[0] == '*'):
				checker.setZeroOrMore()
				dataConsume.pop(0)
			elif (dataConsume[0] == '+'):
				checker.setOneOrMore();
				dataConsume.pop(0)
			elif (dataConsume[0] == '~'):
				checker.setDoNot()
				dataConsume.pop(0)

			if len(dataConsume) == 0:
				raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
						data + ' Cannot find anything to apply the modifier to')

			if (dataConsume[0] in '+~*+'):
				raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
						data + ' More than one modifier')

		while (dataConsume[0] == ' '):
			dataConsume.pop(0)

		if (dataConsume[0] == '"'):

			endind = json_str_consume(dataConsume)

			if endind is None:
				raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
					data + ' Unfinished string')
			else:
				try:
					value = json.loads(''.join(dataConsume[:endind+1]))
				except:
					raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
						data + ' Invalid json string')
				try:
					checker.setValue(value)
				except:
					raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
						data + ' Invalid regex string')
				dataConsume[:endind+1] = []

		if len(dataConsume) == 0:
			return(checker)

		while (dataConsume[0] == ' '):
			dataConsume.pop(0)
			if len(dataConsume) == 0:
				break;

		rest = ''.join(dataConsume)
		match = self.__miniRegex.search(rest);
		if (match is not None):
			try:
				checker.setTag(rest[:match.end()])
			except:
				raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
					data + ' Invalid minimal regex tag')
			dataConsume[:match.end()] = []

		if len(dataConsume) == 0:
			return(checker)
		while (dataConsume[0] == ' '):
			dataConsume.pop(0)
			if len(dataConsume) == 0:
				return(checker)

		if (allowConditions):
			while True:

				while (dataConsume[0] == ' '):
					dataConsume.pop(0)
					if len(dataConsume) == 0:
						break;

				if (dataConsume[0] == '<' or dataConsume[0] == '>'):
					relativeInd = 0
					while dataConsume[0] in '<>':
						if (dataConsume[0] == '>'):
							relativeInd+=1
						else:
							relativeInd-=1
						dataConsume.pop(0)

					while (dataConsume[0] == ' '):
						dataConsume.pop(0)
						if len(dataConsume) == 0:
							break;

					value = None
					if (dataConsume[0] == '"'):
						endind = json_str_consume(dataConsume)
						if endind is None:
							raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
								data + ' Unfinished string')
						else:
							try:
								value = json.loads(''.join(dataConsume[:endind+1]))
							except:
								raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
									data + ' Invalid json string')
							value = value
							dataConsume[:endind+1] = []

					tag = None

					rest = ''.join(dataConsume)
					match = self.__miniRegex.search(rest);
					if (match is not None):
						tag = rest[match.start():match.end()]
						dataConsume[:match.end()] = []

					if tag is None and value is None:
						raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
								data + ' Useless rule')

					try:
						checker.setCondition(relativeInd,value,tag)
					except:
						raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
									data + ' Invalid regexs')

					if (len(dataConsume) == 0):
						break
				else:
					break

		if (len(dataConsume) > 0):
			raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
					data + ' Cannot parse ' + rest)

		return(checker)
	from os import path
	import json
	import re

	try:
	from .Psy import *
	from .PsyElem import *
	from .psyutil import *
	except:
	from Psy import *
	from PsyElem import *
	from psyutil import *

	class PsySyntaxError(Exception):
	pass

	class PsyCheckerError(Exception):
	pass

	class PsyParser():
	def __init__(self,location):

	self.data = []
	self.indexof = {}

	self.__header = re.compile('^[a-zA-Z0-9_]+')
	self.__miniRegex = re.compile('^[a-zA-Z0-9_\\*\\?\\+\\~\\(\\)\\[\\]\\.\\{\\}\\@\\#\\\\]+')

	control = open(path.join(location,'control.txt'),'r')
	for line in control:
	psyfile = line.split('###')[0].strip();
	if (len(psyfile) > 0):
	self.__loadFile(path.join(location,psyfile));


	def extract(self,elems):
	for exprgroup in self.data:
	elems = exprgroup.extract(elems)
	return PsyElem(elems,'SENTENCE','sentence',{'components':elems})

	def __loadFile(self,filename):
	print('loading ' + filename)
	f = open(filename,'r')

	#The current expression group name
	curExprGroupName = None
	#the accumulated data for that expression
	curExprGroupData = []
	#the line number
	curLineNumber = 0;

	for line in f:

	#this is the real line without comments
	curLine = line.split('###')[0].rstrip();
	curLineNumber+=1

	#if we got nothing we continue
	if len(curLine) == 0:
	continue;
	#if the group name doesn't exist
	elif curExprGroupName is None:
	#we wait to get the first group name
	match = self.__header.search(curLine);

	#if we find it
	if match is not None and match.start() == 0 and match.end() == len(curLine):
	curExprGroupName = curLine
	if (curExprGroupName[:2] == '__'):
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(curLineNumber) + ' at \n' + line + '\nInvalid name, cannot start with __')
	elif (curExprGroupName in self.indexof.keys()):
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(curLineNumber) + ' at \n' + line + '\nExpression group was already defined')

	#otherwise if we find something else to start with
	else:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(curLineNumber) + ' at \n' + line + '\nInvalid Identation')

	#if the line is idented
	elif curLine[0] == '\t':
	#we add that data
	curExprGroupData.append((curLine,curLineNumber))

	#otherwise and if there's an active group
	else:
	#we check if there comes a new one
	match = self.__header.search(curLine);

	#if we have a match that means the previous ended
	if match is not None and match.start() == 0:
	#so we add the previous
	self.__loadExprGroup(curExprGroupName,curExprGroupData,filename)

	#and we reset the fields
	curExprGroupName = curLine
	curExprGroupData = []

	#otherwise there should be something wrong
	else:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(curLineNumber) + ' at \n' + line + '\nInvalid Expression Group Name')


	#if after the loop ends we still have data to add (which will most likely)
	if (len(curExprGroupData) > 0):
	#we add it
	self.__loadExprGroup(curExprGroupName,curExprGroupData,filename)

	def __loadExprGroup(self,name,data,filename):

	exprGroup = PsyExprGroup(name);
	for element in data:
	line, lineNumber = element

	#remove trailing \t
	line = line[1:]

	if (line[0] != '&'):
	#create the expression for the expression
	expr = PsyExpr();

	#splitting from data
	checkers = json_split(line,'&')

	#for every checker in the list of checkers
	for checker in checkers:
	checker_stripped = checker.strip()
	if (len(checker_stripped) > 0):
	expr.addChecker(self.__getChecker(checker_stripped,line,lineNumber,filename))
	else:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(lineNumber) + ' at \n' + line + '\nEmpty Checker')

	exprGroup.addExpr(expr)
	else:
	if (line[1:6] == 'META '):
	match = self.__miniRegex.search(line[6:]);
	if (match is not None):

	metaname = '__META__' + name + '_' + line[match.start()+6:match.end()+6]
	rest = line[match.end()+6:].strip()

	if (metaname not in self.indexof.keys()):
	self.data.append(PsyExprGroup(metaname))
	self.indexof[metaname] = (len(self.data) - 1)

	index = self.indexof[metaname]
	expr = PsyExpr();
	checkers = json_split(rest,'&')

	#for every checker in the list of checkers
	for checker in checkers:
	checker_stripped = checker.strip()
	if (len(checker_stripped) > 0):
	expr.addChecker(self.__getChecker(checker_stripped,line,lineNumber,filename))
	else:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(lineNumber) + ' at \n' + line + '\nEmpty Checker')

	#add the expression
	self.data[index].addExpr(expr)
	else:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(lineNumber) + ' at \n' + line + '\nInvalid Metaname')

	elif (line[1:6] == 'ATTR '):
	match = self.__header.search(line[6:]);
	if (match is not None):
	attrname = line[match.start()+6:match.end()+6]
	rest = line[match.end()+6:].strip()

	factor = None
	if (rest[:4] == 'ALL '):
	factor = 'all'
	rest = rest[4:].strip()
	elif (rest[:6] == 'FIRST '):
	factor = 'first'
	rest = rest[6:].strip()
	elif (rest[:5] == 'LAST '):
	factor = 'last'
	rest = rest[5:].strip()
	elif (rest[:6] == 'VALUE '):
	factor = 'value'
	rest = rest[5:].strip()
	elif (rest[:3] == 'IF '):
	factor = 'if'
	rest = rest[3:].strip()
	else:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(lineNumber) + ' at \n' + line + '\nInvalid Attribute Factor')

	if (factor != 'value'):
	checkers = json_split(rest,'\|')
	checkersList = []
	for checker in checkers:
	checker_stripped = checker.strip()
	checkersList.append(self.__getChecker(checker_stripped,line,lineNumber,filename,False,False))
	exprGroup.addAttr(attrname,checkersList,factor)
	else:
	try:
	value = json.loads(rest)
	except:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(lineNumber) + ' at \n' + line + '\nInvalid JSON value')
	exprGroup.addFixedAttr(attrname,value)
	else:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(lineNumber) + ' at \n' + line + '\nInvalid Attribute Name')

	elif (line[1:6] == 'TYPE '):
	match = self.__header.search(line[6:]);
	if (match is not None and match.end() == (len(line)-6)):
	ty = line[match.start()+6:match.end()+6]
	exprGroup.setType(ty)
	else:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(lineNumber) + ' at \n' + line + '\nInvalid Type Name')

	if (exprGroup.type is None and len(data) > 0):
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' +
	str(data[0][1]) + ' at \n' + str(data[0][0]) + '\nSpecial setter &TYPE not found for ' + name)

	self.data.append(exprGroup)
	self.indexof[name] = (len(self.data) - 1)

	def __getChecker(self,data,line,lineNumber,filename,allowModifier=True,allowConditions=True):

	checker = PsyChecker()

	dataConsume = list(data)
	if (allowModifier):
	if (dataConsume[0] == '?'):
	checker.setMayExist()
	dataConsume.pop(0)
	elif (dataConsume[0] == '*'):
	checker.setZeroOrMore()
	dataConsume.pop(0)
	elif (dataConsume[0] == '+'):
	checker.setOneOrMore();
	dataConsume.pop(0)
	elif (dataConsume[0] == '~'):
	checker.setDoNot()
	dataConsume.pop(0)

	if len(dataConsume) == 0:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
	data + ' Cannot find anything to apply the modifier to')

	if (dataConsume[0] in '+~*+'):
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
	data + ' More than one modifier')

	while (dataConsume[0] == ' '):
	dataConsume.pop(0)

	if (dataConsume[0] == '"'):

	endind = json_str_consume(dataConsume)

	if endind is None:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
	data + ' Unfinished string')
	else:
	try:
	value = json.loads(''.join(dataConsume[:endind+1]))
	except:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
	data + ' Invalid json string')
	try:
	checker.setValue(value)
	except:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
	data + ' Invalid regex string')
	dataConsume[:endind+1] = []

	if len(dataConsume) == 0:
	return(checker)

	while (dataConsume[0] == ' '):
	dataConsume.pop(0)
	if len(dataConsume) == 0:
	break;

	rest = ''.join(dataConsume)
	match = self.__miniRegex.search(rest);
	if (match is not None):
	try:
	checker.setTag(rest[:match.end()])
	except:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
	data + ' Invalid minimal regex tag')
	dataConsume[:match.end()] = []

	if len(dataConsume) == 0:
	return(checker)
	while (dataConsume[0] == ' '):
	dataConsume.pop(0)
	if len(dataConsume) == 0:
	return(checker)

	if (allowConditions):
	while True:

	while (dataConsume[0] == ' '):
	dataConsume.pop(0)
	if len(dataConsume) == 0:
	break;

	if (dataConsume[0] == '<' or dataConsume[0] == '>'):
	relativeInd = 0
	while dataConsume[0] in '<>':
	if (dataConsume[0] == '>'):
	relativeInd+=1
	else:
	relativeInd-=1
	dataConsume.pop(0)

	while (dataConsume[0] == ' '):
	dataConsume.pop(0)
	if len(dataConsume) == 0:
	break;

	value = None
	if (dataConsume[0] == '"'):
	endind = json_str_consume(dataConsume)
	if endind is None:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin' +
	data + ' Unfinished string')
	else:
	try:
	value = json.loads(''.join(dataConsume[:endind+1]))
	except:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
	data + ' Invalid json string')
	value = value
	dataConsume[:endind+1] = []

	tag = None

	rest = ''.join(dataConsume)
	match = self.__miniRegex.search(rest);
	if (match is not None):
	tag = rest[match.start():match.end()]
	dataConsume[:match.end()] = []

	if tag is None and value is None:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
	data + ' Useless rule')

	try:
	checker.setCondition(relativeInd,value,tag)
	except:
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
	data + ' Invalid regexs')

	if (len(dataConsume) == 0):
	break
	else:
	break

	if (len(dataConsume) > 0):
	raise PsySyntaxError('Syntax Error at ' + filename + ', line ' + str(lineNumber) + ' at \n' + line + '\nin ' +
	data + ' Cannot parse ' + rest)

	return(checker)