D-K-E/cAtfParser.py

## cAtfParser.py
# Packages -------------------

__author__ = "Doğu Kaan Eraslan, <kaaneraslan@gmail.com>"

import re
import itertools

# ---------------------------------

"""
Objects and their relations to each other.

Text
Line
Word
Sign
Al Occurance
Part

Text CONTAINS Line, Word, Sign, AL Occurance, Part

Part CONTAINS Line, Word, Sign, AL Occurance.

Line CONTAINS Word, Sign, CAN CONTAIN AL Occurance.

AL Occurance CONTAINS Word, Sign CAN CONTAIN Line, Part ?

Word CONTAINS Sign

Text Attributes:
Id,
Language,
objectType,
parts
lines
words,
signs,
al_occurances

Part Attributes:
Name/Id,
lines,
words,
signs,
al_occurances

Line attributes:
Comment_Line
Structure_line
Text_line
words,
signs,

AL_Occurance attributes:
Language,
Words,
Signs,
lines,
parts, ?

Word attributes:
Signs

Sign attributes:
Damaged
Unkown reading


API use cases:
find sign X, determines if the X is inside the text, gives the first
user specified level location, first word, first line, first part, etc.

findall sign X, determines if the X is inside the text, gives all the user specified level locations, word level, line level, part level etc.

Findall or find verbose check, gives the full feature dictionary with the wanted occurance.

Find X with attribute Y, determines if there is an x with attribute y, then gives the first user specified level location.

Findall X with attribute Y, same thing with find X @Y but with all the occurances.

Find if there is anything with the attribute Y.
Findall all those with the attribute Y.

Give Word Count
Give Sign Count
Give Part Count

"""

# random test text -----------------

with open("Archival view of P462811.txt","r",encoding="utf-8") as cAtfFile:
    test_file = cAtfFile.read()

# ---------------------------------

# Block level Functions -----------


# Object part Functions ---------------

def get_object_parts(atf_section):
    """
    params: atf_section, str.
    return: object_part_list, []
    """
    #
    object_part_split = atf_section.split("\n@")
    object_part_id_part = object_part_split[0]
    object_part_parts = object_part_split[1:]
    object_parts_at = ["@" + part for part in object_part_parts]
    object_parts_at.insert(0,object_part_id_part)
    #
    return object_parts_at


def char_convert(text):
    """
    Convert CDLI C-ATF characters
    to unicode
    """
    #
    text_sz = text.replace("sz","\u0161") # sz -> š
    text_SZ = text_sz.replace("SZ", "\u0160") # SZ -> Š
    text_sPo = text_SZ.replace("s,", "\u1e63") # s, -> ṣ
    text_SPo = text_sPo.replace("S,", "\u1e62") # S, -> Ṣ
    text_tch = text_SPo.replace("t,", "\u1e6d") # t, -> ṭ
    text_TCH = text_tch.replace("T,", "\u1e6c") # T, -> Ṭ
    text_s = text_TCH.replace("s'", "\u015b") # s' -> ś
    text_S = text_s.replace("S'","\u015a") # S' -> Ś
    text_ayn = text_S.replace("'", "\u02be") # ' -> ʾ
    text_sub0 = text_ayn.replace("0","\u2080")# Subscript numbers
    text_sub1 = text_sub0.replace("1","\u2081")
    text_sub2 = text_sub1.replace("2","\u2082")
    text_sub3 = text_sub2.replace("3","\u2083")
    text_sub4 = text_sub3.replace("4","\u2084")
    text_sub5 = text_sub4.replace("5","\u2085")
    text_sub6 = text_sub5.replace("6","\u2086")
    text_sub7 = text_sub6.replace("7","\u2087")
    text_sub8 = text_sub7.replace("8","\u2088")
    text_sub9 = text_sub8.replace("9","\u2089")
    text_subx = text_sub9.replace("x²","\u208a") # subscript x
    text_subX = text_subx.replace("X²","\u208a")
    #
    return text_subX

def get_words(atf_line):
    """
    params: atf_line, str.
    return: line_words
    """
    #
    line_words = re.findall(" .*? ", atf_line)
    #
    return line_words


def get_signs(atf_word):
    """
    params: atf_word, str.
    return: word_signs
    """
    #
    if "}" in atf_word:
        atf_word = atf_word.replace("}","}-")
        #
    #
    word_signs = atf_word.split("-")
    #
    return word_signs


# Line Level Tests ------------------------


class cAtfLineTester(object):
    """
    a class for testing lines of c-atf texts
    """
    def __init__(self, atf_line):
        #
        self.cAtf_line = atf_line
        #
    #
    def test_id_line(self):
        """
        params: atf_line, str.
        return: boolean

        Tests if the line starts with &, the id marker.
        """
        #
        find_id_line = re.search("(^&P\d+)", self.cAtf_line)
        #
        if find_id_line is None:
            return False
        else:
            return True
    #
    #
    def test_language_line(self):
        """
        params: atf_line, str.
        return: boolean

        tests if the line gives the language of the text
        """
        #
        find_lang_line = re.search("atf: lang", self.cAtf_line)
        #
        if find_lang_line is None:
            return False
        else:
            return True
    #
    #
    def test_line_content(self):
        """
        params: atf_line, str.
        return: boolean

        Tests if the line is commentary about the line content
        """
        #
        find_line_content_comment = re.search("^#.*", self.cAtf_line)
        #
        if find_line_content_comment is None:
            return False
        else:
            return True
    #
    #
    def test_object_type_object_part(self):
        """
        params: atf_line, str.
        return: boolean

        Tests if the line indicates the object type or object part
        """
        #
        find_object_type_part = re.search("^@.*", self.cAtf_line)
        #
        if find_object_type_part is None:
            return False
        else:
            return True
    #
    #
    def test_text_structure(self):
        """
        params: atf_line, str.
        return: boolean

        Tests if the line belongs to a commentary on the text structure
        """
        #
        find_text_structure = re.search("^\$.*", self.cAtf_line)
        #
        if find_text_structure is None:
            return False
        else:
            return True
    #
    #
    def test_text_line(self):
        """
        params: atf_line, str.
        return: boolean

        Tests if the line belongs to a translitteration of a text
        """
        #
        find_text_line = re.search("^\d.*", self.cAtf_line)
        #
        if find_text_line is None:
            return False
        else:
            return True
    #
    #
    def test_lineHas_anotherLanguage(self):
        """
        params: atf_line, str.
        return: boolean
        """
        #
        find_logogram = re.search("_.*?_", self.cAtf_line)
        #
        if find_logogram is None:
            return False
        else:
            return True


class cAtfALTester(object):
    """
    class for handling another languages in the lines
    """
    #
    def __init__(self):
        #
        self.cAtf_line = ""
        self.al_oc = ""
        self.al_word = ""
        #
    #
    #
    def testALHasPreSign(self, al_oc):
        """
        params: atf_line, str.
        return: boolean

        Test to see if the another language occurence
        has a preeceding sign
        """
        #
        if "-_" in self.cAtf_line or "-_" in al_oc:
            return True
        else:
            return False
    #
    #
    def test_ALHasFolSign(self, al_oc):
        """
        params: atf_line, str.
        return: boolean

        Test to see if the another language occurence
        has a preeceding sign

        """
        #
        if "_-" in self.cAtf_line or "_-" in al_oc:
            return True
        else:
            return False
    #
    #
    def test_ALSwitch(self, cAtf_alWord):
        """
        params: atf_logogram, str.
        return: boolean

        Tests if the occurence have a
        language switch
        """
        #
        if "%" in cAtf_alWord:
            return True
        else:
            return False


def test_wordHasAnotherLanguage(atf_word):
    """
    params: atf_word, str.
    return: boolean
    """
    #
    find_logogram = re.search("_.*?_", atf_word)
    #
    if find_logogram is None:
        return False
    else:
        return True

"""
Logogram bölgesinden
dil değiştiriciyi al
ondan sonra onun içindeki
işaretleri aldığın dilde
kodla
"""


"""
If logogram has more than one space
divide from the space and check if there is
more than two signs -

if the logograme has space
see if there is a phonetic complement of the logogramme after the _

see if the logogramme has a sign following
or preeceding it.

see

uppercase = unknown reading.

"""

# Word level Tests

# Sign Level Tests -------------------------------

class cAtfWordTester(object):
    """
    Class for testing the signs in a word
    """
    #
    def __init__(self, catf_word):
        #
        self.cAtf_word = catf_word
    #
    #
    @staticmethod
    def test_String(string1,string2):
        """
        Returns true if string2
        contains string1
        """
        #
        if string1 in string2:
            return True
        else:
            return False
    #
    def test_damaged_sign(self):
        """
        params: atf_word, str.
        return: boolean
        """
        #
        find_damage_sign = re.search("\w+#", self.cAtf_word)
        #
        if find_damage_sign is None:
            return False
        else:
            return True
    #
    #
    def test_determinative_sign(self):
        """
        params: atf_word, str.
        return: boolean
        """
        #
        find_determinative_sign = re.search("\{\w.*?\}",self.cAtf_word)
        #
        if find_determinative_sign is None:
            return False
        else:
            return True
    #
    #
    def test_isNumber(self):
        """
        params: atf_word, str.
        return: boolean
        """
        #
        number_form_1 = re.search("\d+\(\w+.*?\)", self.cAtf_word)
        number_form_2 = re.search("n\(\w+.*?\)", self.cAtf_word)
        number_form_3 = re.search("n\+\d+\(\w+.*?\)", self.cAtf_word)
        #
        if number_form_1 is None and number_form_2 is None and number_form_3 is None:
            return False
        else:
            return True
    #
    # Punctuation Tests -----------
    #
    def test_isColon(self):
        """
        returns true if the word
        has :
        """
        #
        if ":" == self.cAtf_word:
            return True
        else:
            return False
        #
    #
    def test_isDColon(self):
        """
        returns true if the word
        is ::
        """
        #
        if "::" == self.cAtf_word:
            return True
        else:
            return False
    #
    def test_isColonRQ(self):
        """
        returns true if the word
        is :'
        """
        #
        if ":'" == self.cAtf_word:
            return True
        else:
            return False
    #
    def test_isColonDQ(self):
        """
        returns true if the word
        is :"
        """
        #
        if ':"' == self.cAtf_word:
            return True
        else:
            return False
    #
    def test_isDoubleColon(self):
        """
        returns true if the word
        is ::
        """
        if "::" == self.cAtf_word:
            return True
        else:
            return False
        #
    #
    def test_isColonPoint(self):
        """
        returns true if the word
        is :.
        """
        #
        if ":." == self.cAtf_word:
            return True
        else:
            return False
    #
    def test_isWordDivider(self):
        """
        returns true if the word
        is /
        """
        if "/" == self.cAtf_word:
            return True
        else:
            return False
    #
    def test_isWordDivider_Specified(self):
        """
        returns true if the word has
        /(
        """
        #
        return self.test_String("/(", self.cAtf_word)
    #
    # Individual Sign Tests ------------
    #
    def test_has_complement(self):
        """
        Returns true if the sign has
        +
        """
        #
        if "+" in self.cAtf_word:
            return True
        else:
            return False
        #
    #
    def test_has_unknownReading(self):
        """
        Returns true if the sign
        is uppercase
        """
        #
        if self.cAtf_word.isupper() is True:
            return True
        else:
            return False
        #
    #
    def test_has_composite(self):
        """
        Returns true if the sign
        has |
        """
        #
        return self.test_String("|", self.cAtf_word)
    #
    def test_has_specification(self):
        """
        Returns true if the sign
        has (
        """
        #
        return self.test_String("(", self.cAtf_word)
    #
    def test_has_query(self):
        """
        Returns true if the sign
        has ?
        """
        #
        return self.test_String("?", self.cAtf_word)
    #
    def test_has_collation(self):
        """
        returns true if the sign
        has *
        """
        #
        return self.test_String("*", self.cAtf_word)
    #
    def test_has_correction(self):
        """
        returns true if the sign
        has !
        """
        #
        return self.test_String("!", self.cAtf_word)
    #
    def test_hasCurved(self):
        """
        returns true if the sign
        has @c
        """
        #
        return self.test_String("@c", self.cAtf_word)
    #
    def test_hasFlat(self):
        """
        returns true if the sign
        has @f
        """
        #
        return self.test_String("@f", self.cAtf_word)
    #
    def test_hasGunu(self):
        """
        returns true if the sign has
        @g
        """
        return self.test_String("@g", self.cAtf_word)
    #
    def test_hasSheshig(self):
        """
        returns true if the sign has
        @s
        """
        #
        return self.test_String("@s", self.cAtf_word)
    #
    def test_hasTenu(self):
        """
        returns true if the sign has
        @t
        """
        #
        return self.test_String("@t", self.cAtf_word)
    #
    def test_hasNutillu(self):
        """
        returns true if the sign has
        @n
        """
        #
        return self.test_String("@n", self.cAtf_word)
    #
    def test_hasZidatenu(self):
        """
        returns true if the sign has
        @z
        """
        #
        return self.test_String("@z", self.cAtf_word)
    #
    def test_hasKabatenu(self):
        """
        returns true if the sign
        has @k
        """
        #
        return self.test_String("@k", self.cAtf_word)
    #
    def test_hasVertReflected(self):
        """
        returns true if the sign
        has @r
        """
        #
        return self.test_String("@r", self.cAtf_word)
    #
    def test_hasHorReflected(self):
        """
        returns true if the sign
        has @h
        """
        #
        return self.test_String("@h", self.cAtf_word)
    #
    def test_hasVariant(self):
        """
        returns true if the sign
        has @v
        """
        #
        return self.test_String("@v", self.cAtf_word)
    #
    def test_hasRotated(self):
        """
        returns true if the
        sign has @\d+
        """
        #
        if re.search("@\d+",self.cAtf_word) is not None:
            return True
        else:
            return False
    #
    # Compound Sign Tests ------------------
    #
    def test_hasBeside(self):
        """
        returns true if the
        sign has .
        """
        #
        return self.test_String(".", self.cAtf_word)
    #
    def test_hasJoining(self):
        """
        returns true if
        the sign has +
        """
        #
        return self.test_String("+", self.cAtf_word)
    #
    def test_hasAbove(self):
        """
        returns true if the sign
        has &
        """
        #
        return self.test_String("&", self.cAtf_word)
    #
    def test_hasCrossing(self):
        """
        returns true if the sign
        has %
        """
        #
        return self.test_String("%", self.cAtf_word)
    #
    def test_hasAllograph(self):
        """
        returns true if the sign
        has ~
        """
        #
        return self.test_String("~", self.cAtf_word)
    #
    def test_hasSpecialAllograph(self):
        """
        returns true if the sign
        has ~v
        """
        #
        return self.test_String("~v", self.cAtf_word)
    #
    def test_hasFormVariant(self):
        """
        returns true if the sign
        has \
        """
        #
        return self.test_String("\\", self.cAtf_word)
    #
    def test_hasContaining(self):
        """
        returns true if the sign
        has x
        """
        #
        return self.test_String("x", self.cAtf_word)
    #
    def test_hasContaining_Group(self):
        """
        returns true if the sign
        has x(
        """
        #
        return self.test_String("x(", self.cAtf_word)
    #

# TODO Take the signs for specified punctuations
# TODO Take the sign from numbers


# ----------------------------------------------------

class cAtfSignTester(object):
    """
    Class for testing signs in order to buildinga sign dict afterwards
    """
    #
    def __init__(self, cAtf_Sign):
        #
        self.catf_sign = cAtf_Sign
        #
    #
    @staticmethod
    def test_String(string1,string2):
        """
        Returns true if string2
        contains string1
        """
        #
        if string1 in string2:
            return True
        else:
            return False
    #
    def test_isDamaged(self):
        """
        Returns true if the self.catf_sign
        has #
        """
        #
        return self.test_String("#", self.catf_sign)
        #
    #
    def test_isComplement(self):
        """
        Returns true if the self.catf_sign has
        +
        """
        #
        if self.test_String("+", self.catf_sign) and self.test_isComposite():
            return True
        else:
            return False
    #
    def test_isUnknownReading(self):
        """
        Returns true if the self.catf_sign
        is uppercase
        """
        #
        if self.catf_sign.isupper() is True:
            return True
        else:
            return False
        #
    #
    def test_isComposite(self):
        """
        Returns true if the self.catf_sign
        has |
        """
        #
        return self.test_String("|", self.catf_sign)
    #
    def test_isSpecification(self):
        """
        Returns true if the self.catf_sign
        has (
        """
        #
        return self.test_String("(", self.catf_sign)
    #
    def test_is_query(self):
        """
        Returns true if the self.catf_sign
        has ?
        """
        #
        return self.test_String("?", self.catf_sign)
    #
    def test_is_collation(self):
        """
        returns true if the self.catf_sign
        has *
        """
        #
        return self.test_String("*", self.catf_sign)
    #
    def test_is_correction(self):
        """
        returns true if the self.catf_sign
        has !
        """
        #
        return self.test_String("!", self.catf_sign)
    #
    # Modifier Tests ------------------------
    #
    def test_isCurved(self):
        """
        returns true if the self.catf_sign
        has @c
        """
        #
        return self.test_String("@c", self.catf_sign)
    #
    def test_isFlat(self):
        """
        returns true if the self.catf_sign
        has @f
        """
        #
        return self.test_String("@f", self.catf_sign)
    #
    def test_isGunu(self):
        """
        returns true if the self.catf_sign has
        @g
        """
        return self.test_String("@g", self.catf_sign)
    #
    def test_isSheshig(self):
        """
        returns true if the self.catf_sign has
        @s
        """
        #
        return self.test_String("@s", self.catf_sign)
    #
    def test_isTenu(self):
        """
        returns true if the self.catf_sign has
        @t
        """
        #
        return self.test_String("@t", self.catf_sign)
    #
    def test_isNutillu(self):
        """
        returns true if the self.catf_sign has
        @n
        """
        #
        return self.test_String("@n", self.catf_sign)
    #
    def test_isZidatenu(self):
        """
        returns true if the self.catf_sign has
        @z
        """
        #
        return self.test_String("@z", self.catf_sign)
    #
    def test_isKabatenu(self):
        """
        returns true if the self.catf_sign
        has @k
        """
        #
        return self.test_String("@k", self.catf_sign)
    #
    def test_isVertReflected(self):
        """
        returns true if the self.catf_sign
        has @r
        """
        #
        return self.test_String("@r", self.catf_sign)
    #
    def test_isHorReflected(self):
        """
        returns true if the self.catf_sign
        has @h
        """
        #
        return self.test_String("@h", self.catf_sign)
    #
    def test_isVariant(self):
        """
        returns true if the self.catf_sign
        has @v
        """
        #
        return self.test_String("@v", self.catf_sign)
    #
    def test_isRotated(self):
        """
        returns true if the
        self.catf_sign has @\d+
        """
        #
        if re.search("@\d+",self.catf_sign) is not None:
            return True
        else:
            return False
    #
    def test_isModifier(self):
        """
        returns true
        if the self.catf_sign passes all
        the tests related to modifiers
        """
        #
        if self.test_isRotated(self.catf_sign) is True or self.test_isVariant(self.catf_sign) is True or self.test_isHorReflected(self.catf_sign) is True or self.test_isCurved(self.catf_sign) is True or self.test_isFlat(self.catf_sign) is True or self.test_isGunu(self.catf_sign) is True or self.test_isSheshig(self.catf_sign) is True or self.test_isTenu(self.catf_sign) is True or self.test_isNutillu(self.catf_sign) is True or self.test_isZidatenu(self.catf_sign) is True or self.test_isKabatenu(self.catf_sign) is True or self.test_isVertReflected(self.catf_sign) is True:
            return True
        else:
            return False
    # Compound Self.Catf_Sign Tests ------------------
    #
    @staticmethod
    def test_isBinaryScope(operator):
        """
        Tests if the operator has
        binary scope
        the x and the @ will be
        handled individually
        """
        #
        if operator == "&" or operator == "%":
            return True
        else:
            return False
        #
    #
    def test_hasBeside(self):
        """
        returns true if the
        self.catf_sign has .
        """
        #
        return self.test_String(".", self.catf_sign)
    #
    def test_hasJoining(self):
        """
        returns true if
        the self.catf_sign has +
        """
        #
        return self.test_String("+", self.catf_sign)
    #
    def test_hasContaining(self):
        """
        returns true if the self.catf_sign
        has x
        """
        #
        return self.test_String("x", self.catf_sign)
    #
    def test_hasContaining_Group(self):
        """
        returns true if the self.catf_sign
        has x(
        """
        #
        return self.test_String("x(", self.catf_sign)
    #
    def test_hasAbove(self):
        """
        returns true if the self.catf_sign
        has &
        """
        #
        return self.test_String("&", self.catf_sign)
    #
    def test_hasCrossing(self):
        """
        returns true if the self.catf_sign
        has %
        """
        #
        return self.test_String("%", self.catf_sign)
    #
    def test_hasOpposing(self):
        """
        returns true if the seperated strings
        are in uppercase
        """
        #
        test_list = []
        if self.test_String("@",self.catf_sign) is True:
            rep_string = self.catf_sign.replace("@", " ")
            no_number = re.sub("\d+","", rep_string)
            no_whiteSpace = no_number.replace(" ","")
            if no_whiteSpace.isupper() is True:
                return True
            else:
                return False
        else:
            return False
    #
    def test_hasAllograph(self):
        """
        returns true if the self.catf_sign
        has ~
        """
        #
        return self.test_String("~", self.catf_sign)
    #
    def test_hasSpecialAllograph(self):
        """
        returns true if the self.catf_sign
        has ~v
        """
        #
        return self.test_String("~v", self.catf_sign)
    #
    def test_hasFormVariant(self):
        """
        returns true if the self.catf_sign
        has \
        """
        #
        return self.test_String("\\", self.catf_sign)
    #
    def test_hasRepeated(self):
        """
        returns true if the first
        seperated character is digit
        """
        #
        if self.test_String("x", self.catf_sign) is True:
            str_split = self.catf_sign.split("x")
            if str_split[0].isdigit():
                return True
            else:
                return False
        else:
            return False
    #


# -------------------------------------------

class cAtfLineGetter(cAtfLineTester):
    """
    a class for getting text lines
    according to tests
    """
    #
    def __init__(self, atf_line):
        super().__init__(atf_line)
        self.cAtf_line = atf_line
        self.text_id = ""
        self.text_id_alternatives = []
        self.text_lang = ""
        self.content_comment_line = ""
        self.objectSurface_title = ""
        self.structure_comment = ""
        self.text_line = ""
        self.lineNumber = int()
        self.lineWordCount = int()
        self.lineWords = []
        self.lineText = ""
        #
    #
    def get_id_line(self):
        """
        checks the line for
        conforming the id no syntax,
        then gets it.
        """
        #
        atf_line = self.cAtf_line
        if self.test_id_line() == True:
            text_id_search = re.search("&P\d+\s", atf_line)
            text_id_brut = text_id_search.group(0)
            text_id = text_id_brut[:-1] # Cleans the last space
            self.text_id = text_id[1:] # Cleans the &
        else:
            pass
        #
        return self.text_id
    #
    #
    def get_id_alternatives(self):
        """
        Checks the line for id syntax.
        Gets the id alternatives
        separated with the "=".
        """
        #
        atf_line = self.cAtf_line
        #
        if self.test_id_line() == True:
            text_id_alternative_split = atf_line.split("=")
            text_id_alternative_brut = text_id_alternative_split[1:]
            text_id_alternative = [alternative.strip() for alternative in text_id_alternative_brut]
            self.text_id_alternatives = text_id_alternative
        else:
            pass
        #
        return self.text_id_alternatives
    #
    #
    def get_language_line(self):
        """
        Checks the line for
        language protocol syntax
        Gets the indicated language
        """
        #
        atf_line = self.cAtf_line
        #
        if self.test_language_line() == True:
            text_lang_search = re.search("atf: lang.*", atf_line)
            text_lang_brut = text_lang_search.group(0)
            text_lang = text_lang_brut[len("atf: lang "):].strip()
            self.text_lang = text_lang
        else:
            pass
        #
        return self.text_lang
    #
    #
    def get_content_comment(self):
        """
        Checks the line for
        content comment syntax
        ie #.
        Gets the content comment line
        """
        #
        atf_line = self.cAtf_line
        #
        if self.test_line_content() == True:
            content_comment_search = re.search("^#.*", atf_line)
            content_comment = content_comment_search.group(0)
            self.content_comment_line = content_comment
        else:
            pass
        #
        return self.content_comment_line
    #
    #
    def get_object_part_title(self):
        """
        Checks if the line starts with @.
        Gets the line if it does.
        """
        #
        atf_line = self.cAtf_line
        #
        if self.test_object_type_object_part() == True:
            object_title_search = re.search("^@.*", atf_line)
            object_surface_title = object_title_search.group(0)
            self.objectSurface_title = object_surface_title
        #
        else:
            pass
        #
        return self.objectSurface_title
    #
    #
    def get_structure_comment(self):
        """
        Checks if the line starts with $
        Gets the line if it does.
        """
        #
        atf_line = self.cAtf_line
        #
        if self.test_text_structure() == True:
            structure_comment_search = re.search("^\$.*", atf_line)
            structure_comment = structure_comment_search.group(0)
            self.structure_comment = structure_comment
            #
        else:
            pass
        #
        return self.structure_comment
    #
    #
    def get_text_line(self):
        """
        Checks if the line starts with a \d+.
        Gets the line if it does.
        """
        #
        atf_line = self.cAtf_line
        #
        if self.test_text_line() == True:
            text_line_search = re.search("^\d+\.\s.*", atf_line)
            text_line = text_line_search.group(0)
            self.text_line = text_line
            #
        #
        else:
            pass
        #
        return self.text_line
    #
    #
    def get_line_text(self):
        """
        Gets the line text
        excluding the line number.
        """
        #
        if self.test_text_line() == True:
            #
            # Getting rid of the line number
            #
            line_no_search = re.search("^\d+\.\s", self.cAtf_line)
            line_no_brut = line_no_search.group(0)
            text_line = self.cAtf_line[len(line_no_brut):]
            self.lineText = text_line
        else:
            pass
        return self.lineText
    #
    #
    def get_line_number(self):
        """
        return: self.lineNumber, int.
        Checks if the line is text line
        gets the line number if it is.
        """
        #
        if self.test_text_line() == True:
            line_no_search = re.search("^\d+\.\s", self.cAtf_line)
            line_no_brut = line_no_search.group(0)
            line_no_str = line_no_brut[:-2] # Cleans the white space and the dot.
            line_no = int(line_no_str)
            self.lineNumber = line_no
            #
        else:
            self.lineNumber = None
        #
        return self.lineNumber
    #
    #
    def get_line_word_count(self):
        """
        gets the number of words in text line
        assuming that they are
        seperated by whitespace
        """
        #
        text_line_no_number = self.get_line_text()
        text_line_split = text_line_no_number.split(" ")
        #
        # See if there is anything empty
        #
        for text_line in text_line_split:
            if len(text_line) == 0:
                text_line_split.remove(text_line)
                #
                #
            #
        word_count = len(text_line_split)
        self.lineWordCount = word_count
            #
        #
        return self.lineWordCount
    #
    #
    def get_line_words(self):
        """
        params: lineText, str.
        return: lineWords, []
        Gets the whitespace delimited
        words in line
        """
        #
        text_line_no_number = self.get_line_text()
        text_line_split = text_line_no_number.split(" ")
        #
        # See if there is anything empty
        #
        for text_line in text_line_split:
            if len(text_line) == 0:
                text_line_split.remove(text_line)
                #
            #
        #
        line_words = text_line_split
        self.lineWords = line_words
        #
        #
        return self.lineWords
    #


class cAtfLineDictBuilder(cAtfLineGetter):
    """
    class for building the line_dict,
    dictionary.
    """
    #
    def __init__(self, atf_line):
        #
        super().__init__(atf_line)
        #
        self.cAtf_line = atf_line
        self.isLineStructure = False
        self.isLineComment = False
        self.lineDict = {}
        #
    #
    def isLineStruc(self):
        """
        Test if the line is a
        structure comment
        """
        #
        if self.test_text_structure() == True:
            self.isLineStructure = True
            #
        else:
            self.isLineStructure = False
        #
        return self.isLineStructure
    #
    def isLineCom(self):
        """
        test if the line is a
        comment about the content
        """
        #
        if self.test_line_content() == True:
            self.isLineComment = True
        else:
            self.isLineComment = False
        #
        return self.isLineComment
    #
    #
    def lineDictBuild(self):
        """
        builds the line dict
        based on preeceding
        methods
        """
        #
        self.lineDict["isLineStructure"] = self.isLineStruc()
        self.lineDict["isLineContent"] = self.isLineCom()
        self.lineDict["lineNumber"] = self.get_line_number()
        self.lineDict["lineWordCount"] = self.get_line_word_count()
        self.lineDict["lineText"] = self.get_line_text()
        self.lineDict["lineWords"] = list(set(self.get_line_words()))
        # Removed duplicates, for efficiency.
        self.lineDict["lineWordPos"] = list(enumerate(self.get_line_words()))
        if len(self.lineDict["lineWords"]) == 0 and self.lineDict["lineNumber"] is None:
            return None
        else:
            pass
        #
        return self.lineDict
    #


class cAtfALHandler(cAtfALTester):
    """
    Handle Another Language occurances.
    """
    #
    def __init__(self, cAtf_part):
        super().__init__()
        #
        self.cAtf_part = cAtf_part
        self.lineDict_list = []
        self.cAtf_part_lines = []
        self.alRef_list = []
        self.alGroup_list = []
        self.mulAlOc_group_list = []
        self.singAlOc_group_list = []
        self.mulAlOc_line_list = []
        self.mulAlOc_lineDict_list = []
        self.mulAlOcS = []
        self.singAlOcS = []
        self.alOc_list = []
        self.AlOcS = []
        self.alLanguage = ""
        self.textLang = ""
        #
    #
    #
    def set_ALOC_lang(self, lang):
        """
        Sets the value of self.alLanguage
        """
        #
        self.alLanguage = lang
        #
        return self.alLanguage
    #
    def set_textLang(self, lang):
        """
        Sets the value of self.textLang
        """
        #
        self.textLang = lang
        #
        return self.textLang
    #
    def splitPartLines(self):
        """
        params: self.cAtf_part, str.
        return: self.cAtf_part_lines, []
        splits the part into lines
        """
        #
        self.cAtf_part_lines = self.cAtf_part.splitlines()
        #
        return self.cAtf_part_lines
    #
    @staticmethod
    def lineDictBuild(cAtf_line):
        """
        Uses the lineDictBuilder class
        method
        """
        #
        line_class = cAtfLineDictBuilder(cAtf_line)
        line_dict = line_class.lineDictBuild()
        #
        return line_dict
    #
    #
    def get_lineDict_list(self):
        """
        params: self.cAtf_part_lines, []
        return: self.lineDict_list, []

        gets the lines in dict form
        """
        #
        for cAtf_line in self.cAtf_part_lines:
            lineDict = self.lineDictBuild(cAtf_line)
            if lineDict is not None:
                self.lineDict_list.append(lineDict)
        #
        return self.lineDict_list
    #
    @staticmethod
    def test_twoTimesUnScore(lineWord):
        """
        Tests if a word has the underscore
        two times or not.
        """
        #
        unscoCount = lineWord.count("_")
        if unscoCount == 2:
            return True
        elif unscoCount == 1:
            return False
        elif unscoCount < 1:
            return None
        else:
            pass
        #
        return None
    #
    def get_ALRefs_lineLevel(self):
        """
        Searches whether words of a line
        contain a another language switch
        If the word contains the underscore 2 times
        it is added 2 times for facilitating grouping
        after.
        """
        #
        lineDict_list_sorted = sorted(self.lineDict_list, key=lambda lineDict:lineDict["lineNumber"])
        for lineDict in lineDict_list_sorted:
            lw_list = list(lineDict["lineWordPos"])
            line_word_list_sorted = sorted(lw_list, key=lambda wpTuple:wpTuple[0])
            for WordP, lineWord in line_word_list_sorted:
                if self.test_twoTimesUnScore(lineWord) is True:
                    self.alRef_list.append((WordP, lineWord, lineDict["lineNumber"]))
                    self.alRef_list.append((WordP, lineWord, lineDict["lineNumber"]))
                elif self.test_twoTimesUnScore(lineWord) is False:
                    # (1, WORD, lineNO)
                    self.alRef_list.append((WordP, lineWord, lineDict["lineNumber"]))
                    #
                #
    @staticmethod
    def grouper(iterable, n, fillvalue=None):
        "Collect data into fixed-length chunks or blocks"
        # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
        args = [iter(iterable)] * n
        #
        return itertools.zip_longest(*args, fillvalue=fillvalue)
        #
    def group_ALRefs(self):
        """
        Groups the AL references for
        marking the AL occurances
        """
        #
        al_ref_groups = self.grouper(self.alRef_list, 2)
        # There should be no need for a fill value, but ...
        # I am hesitating...
        self.alGroup_list = list(al_ref_groups)
        #
        return self.alGroup_list
    #
    @staticmethod
    def test_multilineALGroup(ALGroup):
        """
        params: ALGroup, ((()),(()))
        return: boolean
        Tests if the AL references
        stocked in the al group
        points to a AL occurance
        that spreads into multiple
        lines
        """
        #((11, '_re-e2-um', 1), (11, 're-e2-um_', 2))
        start_point = ALGroup[0]
        end_point = ALGroup[1]
        # (1, WORD, lineNO), (1, WORD, lineNO)
        #
        if start_point[2] != end_point[2]:
            return True
        else:
            return False
        #
        #
    @staticmethod
    def test_singALGroup(ALGroup):
        """
        params: ALGroup, ((()),(()))
        return: boolean
        Tests if the AL references
        stocked in the al group
        points to a AL occurance
        that is confined to 1 line
        """
        #
        # ((11, '_re-e2-um_', 1), (11, '_re-e2-um_', 1))
        start_point = ALGroup[0]
        end_point = ALGroup[1]
        # (1, WORD, lineNO), (1, WORD, lineNO)
        #
        if start_point[2] == end_point[2]:
            return True
        else:
            return False
    #
    def populate_mulALOC_refs(self):
        """
        Populates the multiline
        AL occurance reference list.
        """
        #
        self.mulAlOc_group_list = []
        for alGroup in self.alGroup_list:
            if self.test_multilineALGroup(alGroup) is True:
                self.mulAlOc_group_list.append(alGroup)
                #
            else:
                pass
        #
        return self.mulAlOc_group_list
    #
    #
    def populate_singALOC_refs(self):
        """
        Populates the single line
        AL occurance reference list.
        """
        #
        self.singAlOc_group_list = []
        for alGroup in self.alGroup_list:
            if self.test_singALGroup(alGroup) is True:
                self.singAlOc_group_list.append(alGroup)
                #
            #
        return self.singAlOc_group_list
    #
    #
    @staticmethod
    def get_mulAlOc_lines(alGroup, lineDictList):
        """
        Gets the related lines from the lineDictList, by
        using the alGroup elements as point of reference.
        """
        #
        start_point = alGroup[0]
        end_point = alGroup[1]
        #
        mulAlOc_line_range = range(start_point[2],end_point[2]+1)
        # (1, WORD, lineNO), (1, WORD, lineNO)
        # +1 compensates the function's exclusion of the final element
        mulAlOc_group_line_dict_list = []
        #
        for lineDict in lineDictList:
            if lineDict["lineNumber"] in mulAlOc_line_range:
                mulAlOc_group_line_dict_list.append(lineDict)
                #
        #
        return mulAlOc_group_line_dict_list
    #
    def get_mulAlOc_lineDict_list(self):
        """
        Gets the related lineDicts for
        AL occurances that spread to multiple lines
        """
        #
        self.mulAlOc_lineDict_list = []
        #
        for mulAlOc in self.mulAlOc_group_list:
            line_list = self.get_mulAlOc_lines(mulAlOc,self.lineDict_list)
            self.mulAlOc_lineDict_list.append(line_list)
            #
        #
        return self.mulAlOc_lineDict_list
    #
    @staticmethod
    def get_FW_mulAlOc(mulAlOc_group):
        """
        Gets the First Word and its position of the
        AL Occurance that spreads to multiple
        lines.
        """
        #
        first_item_dict = {}
        mulAlOc_group_sort = sorted(mulAlOc_group, key=lambda lineDict:lineDict["lineNumber"])
        #
        mulAlOc_first_lineDict = mulAlOc_group_sort[0]
        fLineDict_words = mulAlOc_first_lineDict["lineWordPos"]
        #
        for wordPos, flineWord in fLineDict_words:
            if "_" in flineWord:
                first_item_dict[flineWord] = wordPos
        #
        first_item_sort = sorted(tuple(first_item_dict.items()), key=lambda wordWP:wordWP[1])
        first_item = (first_item_sort[-1],mulAlOc_first_lineDict["lineNumber"])
        #
        return first_item
    #
    @staticmethod
    def get_LW_mulAlOc(mulAlOc_group):
        """
        Gets the Last Word and its position of
        the AL Occurance that spreads to multiple
        lines
        """
        #
        last_item_dict = {}
        mulAlOc_group_sort =  sorted(mulAlOc_group, key=lambda lineDict:lineDict["lineNumber"])
        mulAlOc_last_lineDict = mulAlOc_group_sort[-1]
        #
        laLineDict_words = mulAlOc_last_lineDict["lineWords"]
        #
        for lalineWord in laLineDict_words:
            if "_" in lalineWord:
                last_item_dict[lalineWord] = laLineDict_words.index(lalineWord)
        #
        last_item_sort = sorted(tuple(last_item_dict.items()), key=lambda wordWP:wordWP[1])
        last_item = (last_item_sort[0],mulAlOc_last_lineDict["lineNumber"])
        #
        return last_item
    #
    def get_ALOC_lang(self,alOc):
        """
        Gets the AL occurance language
        if it has one specified with
        %,
        if not, we get the specified AL language
        in the constructor
        """
        alWord = alOc[0]
        #
        if self.test_ALSwitch(alWord) is True:
            alword_find = re.search("%\w+",alWord)
            alword_get = alword_find.group(0)
        else:
            alword_get = self.alLanguage
        #
        return alword_get
    #
    def mk_mulAlOc(self, first_item, last_item, mulAlOc_group):
        """
        params:
        first_item, ()
        last_item, ()
        mulAlOc_group, [{},{}, ... ]

        Creates multiline AL Occurance from the parameters.
        alWord_word, str. Another Language word in AL_occurance
        alWord_LineNumber, int. The line number for the al_word
        alWord_AlOc_Position, dict. Relative position of the alWord inside the AL_occurance.
        alWord_AlOc, str. Al_occurance in which the al_word is observed
        alWord_AlOc_LineNumber, list. Line number(s) in which the al_oc is observed
        alWord_LinePosition, dict. Relative position of the alWord inside the Line.

        """
        #
        alWord_dict_list = []
        #
        alOc_words = []
        #
        for lineDict in mulAlOc_group:
            lineNo = lineDict["lineNumber"]
            lineWordPos = lineDict["lineWordPos"]
            lineWCount = lineDict["lineWordCount"]
            #
            for wordPos, lineWord in lineWordPos:
                #
                if lineNo == first_item[1] and wordPos >= first_item[0][1]:
                    alOc_words.append((lineWord,wordPos,lineNo,lineWCount))
                elif first_item[1] < lineNo < last_item[1]:
                    alOc_words.append((lineWord,wordPos,lineNo, lineWCount))
                elif lineNo == last_item[1] and wordPos <= last_item[0][1]:
                    alOc_words.append((lineWord,wordPos,lineNo, lineWCount))
                else:
                    pass
            #
        #
        alOc_words_sorted = sorted(alOc_words, key=lambda al:(al[2],al[1]))
        alOc_word_list = [al[0] for al in alOc_words_sorted]
        alOc_line_list = [al[2] for al in alOc_words_sorted]
        alOc_text = " ".join(alOc_word_list)
        alOc_wordPos = enumerate(alOc_words_sorted)
        #
        for wordP, alOc_tuple in alOc_wordPos:
            alWord_dict = {}
            alWord_dict["alWord_word"] = alOc_tuple[0]
            alWord_dict["alWord_LineNumber"] = alOc_tuple[2]
            alWord_dict["alWord_AlOc"] = alOc_text
            alWord_dict["alWord_language"] = self.get_ALOC_lang(alOc_word_list)
            alWord_dict["alWord_textLanguage"] = self.textLang
            alWord_dict["alWord_alOc_LineNumber"] = alOc_line_list
            alOc_pos_dict = {}
            alOc_pos_dict["totalWords_AlOc"] = len(alOc_word_list)
            alOc_pos_dict["alWord_Position"] = wordP
            alWord_dict["alWord_AlOc_Position"] = alOc_pos_dict
            alOc_line_dict = {}
            alOc_line_dict["totalWords_Line"] = alOc_tuple[3]
            alOc_line_dict["alWord_Position"] = alOc_tuple[1]
            alWord_dict["alWord_LinePosition"] = alOc_line_dict
            alWord_dict_list.append(alWord_dict)
        #
        return alWord_dict_list
    #
    #
    def get_mulAlOcS(self):
        """
        Gets the AL Occurances that spread into multiple lines
        as lists of another language word dictionary
        """
        #
        self.mulAlOcS = []
        #
        for mulAlOc_group in self.get_mulAlOc_lineDict_list():
            first_point = self.get_FW_mulAlOc(mulAlOc_group)
            last_point = self.get_LW_mulAlOc(mulAlOc_group)
            mulAlOc = self.mk_mulAlOc(first_point, last_point, mulAlOc_group)
            self.mulAlOcS.append(mulAlOc)
            #
        #
        return self.mulAlOcS
    #
    @staticmethod
    def get_AlRefs_WordLevel(lineWP):
        """
        params: lineWP, ()
        Gets the starting point and
        end point of the AL Occurance observed
        in a single line

        """
        # lineWP == (WordPOS, WORD, LineNumber )
        #
        alRef_WP_list = []
        #
        if "_" in lineWP[1]:
            alRef_WP_list.append((lineWP[0], lineWP[1]))
            # (WORDPOS, WORD)
        #
        return alRef_WP_list
    #
    def group_ALRef_sing_Wordlevel(self, alRef_WP_list):
        """
        groups the AL occurance references
        observed in a single line
        """
        #
        alRef_WP_groups = self.grouper(alRef_WP_list,2)
        #
        return alRef_WP_groups
    #
    def mk_singAlOc(self, lineDict_list,alRef_WP_group):
        """
        params: lineDict, {}
        alRef_WP_group, ()
        Creates the AL occurance from the lineDict,
        by using the values in the alRef_WP_groups
        """
        #
        alWord_dict_list = []
        #
        alRef_WP_group_sort = sorted(alRef_WP_group, key=lambda alRef:alRef[0])
        alRef_WP_range = range(alRef_WP_group_sort[0][0], alRef_WP_group_sort[1][0]+1)
        #
        lineDict = list(filter(lambda Ldicts: Ldicts.get("lineNumber") == alRef_WP_group[0][2], lineDict_list))[0]
        # Gets the lineDict from the lineDict list for the relative
        # al occurance
        #
        alOc_words = []
        #
        lineWordPos = lineDict["lineWordPos"]
        #
        for WP, word in lineWordPos:
            if WP in alRef_WP_range:
                alOc_words.append((WP, word))
            #
        #
        alOc_words_sorted = sorted(alOc_words, key=lambda alWords:alWords[0])
        alOc_word_list = [al[1] for al in alOc_words_sorted]
        alOc_text = " ".join(alOc_word_list)
        alOc_wordPos = enumerate(alOc_words_sorted)
        #
        for WP, alWordTuple in alOc_wordPos:
            alWord_dict = {}
            alWord_dict["alWord_word"] = alWordTuple[1]
            alWord_dict["alWord_textLanguage"] = self.textLang
            alWord_dict["alWord_language"] = self.get_ALOC_lang(alOc_word_list)
            alWord_dict["alWord_LineNumber"] = lineDict["lineNumber"]
            alWord_dict["alWord_AlOc"] = alOc_text
            alWord_dict["alWord_alOc_LineNumber"] = lineDict["lineNumber"]
            alOc_pos_dict = {}
            alOc_pos_dict["totalWords_AlOc"] = len(alOc_word_list)
            alOc_pos_dict["alWord_Position"] = WP
            alWord_dict["alWord_AlOc_Position"] = alOc_pos_dict
            alOc_line_dict = {}
            alOc_line_dict["totalWords_Line"] = lineDict["lineWordCount"]
            alOc_line_dict["alWord_Position"] = alWordTuple[0]
            alWord_dict["alWord_LinePosition"] = alOc_line_dict
            alWord_dict_list.append(alWord_dict)
        #
        return alWord_dict_list
    #
    def get_singALOcS(self):
        """
        Gets AL Occurances confined to a single
        line as list of AL word dictionary.
        """
        #
        self.singAlOcS = []
        #
        for singAlOc_group in self.singAlOc_group_list:
            # singAlOc_group == ((10, '_kur_', 62), (10, '_kur_', 62))
            # (WORDPOS, WORD)
            singAlOc = self.mk_singAlOc(self.lineDict_list,singAlOc_group)
            self.singAlOcS.append(singAlOc)
        #
        return self.singAlOcS
    #
    def get_ALOcS(self):
        """
        General Method for regrouping
        The methods above.
        """
        #
        self.splitPartLines()
        self.get_lineDict_list()
        self.get_ALRefs_lineLevel()
        self.group_ALRefs()
        self.populate_mulALOC_refs()
        self.populate_singALOC_refs()
        self.get_mulAlOc_lineDict_list()
        self.get_mulAlOcS()
        self.get_singALOcS()
        #
        self.alOc_list = self.mulAlOcS + self.singAlOcS
        flatten_alOc_list = list(itertools.chain.from_iterable(self.alOc_list))
        sort_aloc_list = sorted(flatten_alOc_list, key=lambda alword_dict:(alword_dict["alWord_LineNumber"],alword_dict["alWord_LinePosition"]["alWord_Position"]))
        self.AlOcS = []
        for key, group in itertools.groupby(sort_aloc_list, key=lambda alWord_dict:alWord_dict["alWord_AlOc"]):
            self.AlOcS.append(list(group))
            #
        #
        return self.AlOcS


class cAtfWordDictBuilder(cAtfWordTester):
    """
    Class for building Word dictionaries
    of a normal text line
    """
    #
    def __init__(self,cAtf_Word):
        super().__init__(cAtf_Word)
        self.wordPos_list = []
        self.word = cAtf_Word
        self.lineDict_list = []
        self.det_signList = []
        self.detMarkList = []
        self.detRef_general_list = []
        self.detRef_Group_list = []
        self.signList = []
        self.signList_pos = []
        self.textLang = ""
        self.wordLang = ""
        self.detLang = ""
        self.clean_word = ""
        self.detDict_list = []
        self.wordDict = {}
    #
    #
    def set_textLang(self, lang):
        """
        Text language attribute
        """
        #
        self.textLang = lang
        #
        return self.textLang
    #
    def set_wordLang(self, value):
        """
        Word Language property
        """
        #
        self.wordLang = value
        #
        return self.wordLang
    #
    def set_detLang(self,value):
        """
        Set Determinative Language
        """
        #
        self.detLang = value
        #
        return self.detLang
    #
    @staticmethod
    def set_sign_seperator_curvR(cAtf_Word):
        """
        Sets the sign seperator -
        to the entities with
        parantheses
        """
        #
        if "}" in cAtf_Word and "}-" in cAtf_Word and "}#" in cAtf_Word:
            rep_string = cAtf_Word.replace("}#","#}")
            rep_word = rep_string.replace("}-","}")
            curv_par_sep = rep_word.split("}")
            curv_par = "}-".join(curv_par_sep)
        elif "}" in cAtf_Word and "}-" not in cAtf_Word and "}#" in cAtf_Word:
            rep_word = cAtf_Word.replace("}#","#}")
            curv_par = rep_word.replace("}","}-")
        elif "}#" in cAtf_Word:
            curv_par = cAtf_Word.replace("}#","#}")
        else:
            curv_par = cAtf_Word

        #
        return curv_par
    #
    @staticmethod
    def set_sign_seperator_curvL(cAtf_Word):
        """
        Sets the sign seperator -
        to the entities with
        parantheses
        """
        #
        if "{" in cAtf_Word and "-{" in cAtf_Word:
            rep_word = cAtf_Word.replace("-{","{")
            curv_par_sep = rep_word.split("{")
            curv_par = "-{".join(curv_par_sep)
        elif "{" in cAtf_Word and "-{" not in cAtf_Word:
            curv_par = cAtf_Word.replace("{","-{")
        else:
            curv_par = cAtf_Word

        return curv_par
    #
    @staticmethod
    def set_sign_seperator_corBL(cAtf_Word):
        """
        Sets the sign seperator -
        to the entities with
        parantheses
        """
        #
        if "[" in cAtf_Word and "-[" in cAtf_Word:
            rep_word = cAtf_Word.replace("-[","[")
            corn_par_sep = rep_word.split("[")
            corn_par = "-[".join(corn_par_sep)
        elif "[" in cAtf_Word and "-[" not in cAtf_Word:
            corn_par = cAtf_Word.replace("[","-[")
        else:
            corn_par = cAtf_Word
        #
        return corn_par
    #
    @staticmethod
    def set_sign_seperator_corBR(cAtf_Word):
        """
        Sets the sign seperator -
        to the entities with
        parantheses
        """
        #
        if "]" in cAtf_Word and "]-" in cAtf_Word:
            rep_word = cAtf_Word.replace("]-","]")
            corn_par_sep = rep_word.split("]")
            corn_par = "]-".join(corn_par_sep)
        elif "]" in cAtf_Word and "]-" not in cAtf_Word:
            corn_par = cAtf_Word.replace("]","]-")
        else:
            corn_par = cAtf_Word
        #
        return corn_par
    #
    @staticmethod
    def cleanWord(cWord):
        """
        Cleans the excessive
        sign seperators that might
        have been generated by the
        set_sign_seperators method
        """
        #
        first_el = cWord[0]
        last_el = cWord[-1]
        #
        if "-" == first_el:
            cWord = cWord[1:]
        elif "-" == last_el:
            cWord = cWord[:-1]
        else:
            pass
        #
        return cWord
    #
    #
    def set_sign_seperators(self):
        """
        Uses the previous sign
        seperator methods to add
        sign seperator - to right
        places
        """
        #
        cvl_word = self.set_sign_seperator_curvL(self.cAtf_word)
        cvr_word = self.set_sign_seperator_curvR(cvl_word)
        crl_word = self.set_sign_seperator_corBL(cvr_word)
        crr_word = self.set_sign_seperator_corBR(crl_word)
        self.clean_word = self.cleanWord(crr_word)
        #
        return self.clean_word
    #
    @staticmethod
    def seperate_signs(clean_word):
        """
        Seperates the signs and assigns
        them an index number.
        """
        #
        sign_list_brut = clean_word.split("-")
        sign_list = [sign.strip() for sign in sign_list_brut if sign.strip()]
        sign_list = sign_list
        #
        return sign_list
    #
    def get_detRefs(self):
        """
        Gets the starting point and the end point
        of determinatives
        """
        #
        signList_unsort = self.seperate_signs(self.clean_word)
        self.signList = signList_unsort
        signList_pos = list(enumerate(signList_unsort))
        self.signList_pos = sorted(signList_pos, key=lambda signPos:signPos[0])
        # (0,'lu'),(1, 'mesz'), etc.
        #
        self.detRef_general_list = []
        #
        for signPos, sign in self.signList_pos:
            if "{" in sign and "}" in sign:
                self.detRef_general_list.append((signPos,sign))
                self.detRef_general_list.append((signPos,sign))
            elif "{" in sign or "}" in sign:
                self.detRef_general_list.append((signPos,sign))
                #
            else:
                pass
            #
        #
        return self.detRef_general_list
    #
    @staticmethod
    def grouper(iterable, n, fillvalue=None):
        "Collect data into fixed-length chunks or blocks"
        # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
        args = [iter(iterable)] * n
        #
        return itertools.zip_longest(*args, fillvalue=fillvalue)
        #
    def group_detRefs(self):
        """
        Groups the AL references for
        marking the AL occurances
        """
        #
        det_ref_groups = self.grouper(self.detRef_general_list, 2)
        #
        self.detRef_Group_list = list(det_ref_groups)
        # (signPos,sign), (signPos,sign)
        #
        return self.detRef_Group_list
    #
    @staticmethod
    def detRanger(detRef_Group):
        """
        Gives the range of sign positions
        included in the determinative
        """
        #
        first_sign = detRef_Group[0]
        last_sign = detRef_Group[1]
        #
        detRange = range(first_sign[0], last_sign[0]+1)
        #
        return detRange
    #
    def get_detSigns(self):
        """
        gets the signs of the determinatives
        """
        #
        for detRef_group in self.detRef_Group_list:
            detSign_list = []
            detRange = self.detRanger(detRef_group)
            for SP, sign in self.signList_pos:
            #(SignPos, Sign),(SignPos, Sign), etc.
                if SP in detRange:
                    detSign_list.append((SP, sign))
            self.det_signList.append(tuple(detSign_list))
        #
        #
        #self.det_signList.append(tuple(detSign_list))
        #
        return self.det_signList
    #
    def uniqDetSigns(self):
        """
        Filter duplicates from det_signList
        """
        #
        detSy = set()
        det_list = []
        #
        for detl in self.det_signList:
            if detl not in detSy:
                detSy.add(detl)
                det_list.append(detl)
        #
        self.det_signList = det_list
        #
        return self.det_signList
    #
    def mrk_dets(self):# detSignlist element of self.det_signList
        """
        params: detSignlist, [(signPos, sign),(), ...]
        Marks the determinatives as
        prepos, postpos, inpos
        """
        #
        mark_set = set()
        #
        signList = sorted(self.signList_pos, key=lambda x:x[0])
        #
        for detSignlist in self.det_signList:
            detList = sorted(detSignlist, key=lambda x:x[0])
            # detSignlist == [(signPos, sign),(), ...]
            # sort according to sign position
            # sort according to sign position
            if detList[0][0] > signList[0][0] and detList[-1][0] < signList[-1][0]:
                detList.append("inpos")
                mark_set.add(tuple(detList))
            elif detList[0][0] == 0:
                detList.append("prepos")
                mark_set.add(tuple(detList))
            elif detList[-1][0] == signList[-1][0]:
                detList.append("postpos")
                mark_set.add(tuple(detList))
            #
        self.detMarkList = list(mark_set)
        #
        return self.detMarkList
    #
    @staticmethod
    def mk_detDict(detMark, sign_list):
        """
        params: detMark, ((),(),(), ...,"")
        Constructs the determinatives dictionary.
        """
        #
        # detMark == [(signPos, sign),(signPos, sign),MARK]
        det_signList = [detm for detm in detMark if isinstance(detm, tuple)]
        detList_sort = sorted(det_signList, key=lambda x:x[0])
        det_mark_str = detMark[-1]
        totalSigns = len(sign_list)
        detSigns = [det[1] for det in detList_sort]
        detText = "-".join(detSigns)
        detSignPos = list(enumerate(detList_sort))
        detPos_list = [det[0] for det in detList_sort]
        detPos = (detPos_list[0],detPos_list[-1])
        detLength = len(detSigns)
        #
        detEntity_list = []
        #
        for detSign in detSignPos:
            # detSign == (0,(3,an)),(1,(4,mesz)), etc
            detSign_dict = {}
            detSign_dict["detSign_det"] = detText
            detSign_dict["detSign_det_WordPos"] = detPos
            detSign_dict["detSign_detMark"] = det_mark_str
            detSign_dict["detSign_detSign"] = detSign[1][1]
            detSign_word_pos = {}
            detSign_word_pos["totalSigns_word"] = totalSigns
            detSign_word_pos["detSign_position"] = detSign[1][0]
            detSign_dict["detSign_WordPosition"] = detSign_word_pos
            detSign_sign_pos = {}
            detSign_sign_pos["totalSigns_determinative"] = detLength
            detSign_sign_pos["detSign_position"] = detSign[0]
            detSign_dict["detSign_DetPosition"] = detSign_sign_pos
            detEntity_list.append(detSign_dict)
        #
        detEntity_tuple = tuple(detEntity_list)
        #
        return detEntity_tuple
    #
    def get_detDictS(self):
        """
        Populates the determinative list
        in the form of list of list of dicts.
        Dicts represent a sign of a determinative
        list of dicts represent the determinative
        list of list of dicts represent the
        determinatives of the word.
        """
        #
        self.get_detRefs()
        self.group_detRefs()
        signlist = self.signList_pos
        #
        self.get_detSigns()
        self.uniqDetSigns()
            #detSignlist == [[(signPos, sign),(), ...], [(signPos, sign),(), ...] ]
            # detsign == [(signPos, sign),(), ...]
        detMarkList = self.mrk_dets()
            #
        for detMark in detMarkList:
            # detMark == [(signPos, sign),(signPos, sign),MARK]
            detDicts = self.mk_detDict(detMark, signlist)
            self.detDict_list.append(detDicts)
        #
        return self.detDict_list
    #
    def wordDictBuild(self):
        """
        Builds the wordDict
        """
        #
        self.set_sign_seperators()
        self.get_detRefs()
        self.detDict_list = []
        self.get_detDictS()
        #
        self.wordDict = {}
        self.wordDict["word_wordSignCount"] = len(self.signList_pos)
        self.wordDict["word_word"] = self.cAtf_word
        self.wordDict["word_determinatives"] = self.detDict_list
        self.wordDict["word_wordSignsPos"] = self.signList_pos
        self.wordDict["word_Signs"] = list(set(self.signList))
        # Removed duplicates for efficiency
        self.wordDict["word_hasDamage"] = self.test_damaged_sign()
        self.wordDict["word_wordLang"] = self.wordLang
        self.wordDict["word_isNumber"] = self.test_isNumber()
        self.wordDict["word_hasComplement"] = self.test_has_complement()
        self.wordDict["word_hasUnknownReading"] = self.test_has_unknownReading()
        self.wordDict["word_hasComposite"] = self.test_has_composite()
        self.wordDict["word_hasSpecification"] = self.test_has_specification()
        self.wordDict["word_hasQuery"] = self.test_has_query()
        self.wordDict["word_hasCollation"] = self.test_has_collation()
        self.wordDict["word_hasCorrection"] = self.test_has_correction()
        self.wordDict["word_isColon"] = self.test_isColon()
        self.wordDict["word_isDColon"] = self.test_isDColon()
        self.wordDict["word_isColonRQ"] = self.test_isColonRQ()
        self.wordDict["word_isColonDQ"] = self.test_isColonDQ()
        self.wordDict["word_isWordDivider"] = self.test_isWordDivider()
        self.wordDict["word_isSpecifiedWordDivider"] = self.test_isWordDivider_Specified()
        self.wordDict["word_hasComplement"] = self.test_has_complement()
        self.wordDict["word_hasUnknownReading"] = self.test_has_unknownReading()
        self.wordDict["word_hasCurved"] = self.test_hasCurved()
        self.wordDict["word_hasFlat"] = self.test_hasFlat()
        self.wordDict["word_hasGunu"] = self.test_hasGunu()
        self.wordDict["word_hasSheshig"] = self.test_hasSheshig()
        self.wordDict["word_hasTenu"] = self.test_hasTenu()
        self.wordDict["word_hasNutillu"] = self.test_hasNutillu()
        self.wordDict["word_hasZidatenu"] = self.test_hasZidatenu()
        self.wordDict["word_hasKabatenu"] = self.test_hasKabatenu()
        self.wordDict["word_hasVertReflected"] = self.test_hasVertReflected()
        self.wordDict["word_hasHorReflected"] = self.test_hasHorReflected()
        self.wordDict["word_hasVariant"] = self.test_hasVariant()
        self.wordDict["word_hasRotated"] = self.test_hasRotated()
        self.wordDict["word_hasBeside"] = self.test_hasBeside()
        self.wordDict["word_hasJoining"] = self.test_hasJoining()
        self.wordDict["word_hasAbove"] = self.test_hasAbove()
        self.wordDict["word_hasCrossing"] = self.test_hasCrossing()
        self.wordDict["word_hasAllograph"] = self.test_hasAllograph()
        self.wordDict["word_hasSpecialAllograph"] = self.test_hasSpecialAllograph()
        self.wordDict["word_hasFormVariant"] = self.test_hasFormVariant()
        self.wordDict["word_hasContaining"] = self.test_hasContaining()
        self.wordDict["word_hasContainingGroup"] = self.test_hasContaining_Group()
        #
        return self.wordDict

# ----------------------------------


class cAtfSignDictBuilder(cAtfSignTester):
    """
    Class regrouping methods for building a signDict
    """
    #
    # Operator types for Compound Signs ----------------------
    operator_dict = {
        "beside":".",
        "joining":"+",
        "containing":"x", # This is also used for indicating repetitions.
        # Thus needs to be handled DONE # Binary scope
        "above":"&", # Binary scope
        "crossing":"%", # Binary scope
        "opposing":"@", # This needs to be handled, it is also used in
        # modifiers and part titles. TODO modifiers DONE
        # binary scope
    }
    modifier_dict = {
        "curved":"@c",
        "flat":"@f",
        "gunu":"@g", # 4 extra wedges
        "sheshig":"@s", # added sze sign
        "tenu":"@t", # slanting
        "nutillu":"@n", # unfinished
        "zidatenu":"@z", # slanting right
        "kabatenu":"@k", # slanting left
        "verticallyReflected":"@r",
        "horizontallyReflected":"@h",
        "variant":"@v"
        # Rotations need to be handled seperately DONE
    }
    #
    def __init__(self, catf_sign):
        super().__init__(catf_sign)
        self.catf_sign = catf_sign
        self.signDict = {}
        self.compositeSign = ""
        self.prnthsPosition_list = []
        self.sign_dict_list = []
        self.signRelation_dict_list = []
        #
    #
    #
    """
    TODO
    Composed signs should have
    nesting level indicators
    for signs like |ANx(AN.AN)| etc.
    And the relations should be specified
    in the feature dict.

    TODO Specifications are treated as
    words when they are delimited by space
    signs when they are delimited by -
    Sayılarla ilgili bir karar vermem lazım.
    Karmaşık işaretlerden de oluşuyor olabilirler.

    """
    #
    def get_compositeSign(self):
        """
        Gets the composite sign.
        """
        #
        if self.test_isComposite() is True:
            composite_sign_search = re.search("\|.*?\|", self.catf_sign)
            self.signDict["sign_isDamaged"] = self.test_isDamaged()
            # This test is done here because
            # C-ATF treates compound signs as atoms
            # If one would like to extend this extractor to
            # O-ATF then this has to moved to elsewhere.
            composite_sign = composite_sign_search.group(0)
            self.compositeSign = composite_sign[1:-1]
            # 1 - -1 for getting rid of | on both sides
        else:
            pass
        #
        return self.compositeSign
    #
    @staticmethod
    def get_nestElements(nestedString):
        """
        Generates the paranthese content
        with its associated level
        if the composite sign is nested.

        Code adapted from SO:
        author: Gareth Rees
        date Published: 2010-11-26-12-32
        date Retrieved: 2017-04-23-19-54
        url: http://stackoverflow.com/questions/4284991/parsing-nested-parentheses-in-python-grab-content-by-level
        """
        #
        paren_stack = []
        for i, char in enumerate(nestedString):
            # Ex. CompositeSign == |AN.(ANxAN)&((AN.AN)%AN)|
            if char == "(":
                paren_stack.append(i)
                # Adds the position of (
            elif char == ")" and paren_stack:
                # Comes the next )
                start = paren_stack.pop()
                # Gives the last added ( position
                # The logic is that the last added ( would correspond to
                # the first ) and by using pop we ensure
                # that the second ) doesn't mismatch with the ( of
                # the previous right paranthese.
                yield (len(paren_stack),list(range(start, i+1)), nestedString[start+1:i])
                # the last expression inside the [] excludes the i and
                # adds one to the position of the ( so that we have the
                # content.
                # **WARNING** Range values includes parantheses
        #
    def get_OpPositions(self, compoundSign):
        """
        gets the operator positions from the
        compound sign.
        """
        #
        opPosition_list = []
        #
        for charPos, char in enumerate(compoundSign):
            if char in self.operator_dict.values():
                opPosition_list.append((charPos, char))
            #
        #
        return opPosition_list
    #
    @staticmethod
    def get_nestLevelDict(nestList):
        """
        Maps the output of the generator
        expression to a dictionary
        for facilitating later use.
        """
        #
        nestLevel_dict_list = []
        #
        for nestL in nestList:
            nestLDict = {}
            nestLDict["nest_level"] = nestL[0]
            nestLDict["nest_range"] = nestL[1]
            nestLDict["nest_content"] = nestL[2]
            nestLevel_dict_list.append(nestLDict)
        #
        return nestLevel_dict_list
    #
    @staticmethod
    def get_nestDict(nestList):
        """
        Creates a dictionary based on nest levels.
        """
        #
        nestDict = {}
        #
        sort_nestList = sorted(nestList, key=lambda x:x[0]) #
        #
        for nestEl in sort_nestList:
            nestDict.setdefault(nestEl[0], []).append(nestEl[1:])
        #
        return nestDict
    #
    @staticmethod
    def nestDict_LevelRangeCreator(nestDict):
        """
        Regroups the range list of nest elements
        for each level and appends it to the end
        of the value associated with the nest level
        """
        #
        nestDict_Ranges = {}
        for key, nestEl in nestDict.items():
            nestLevel_range_list = []
            for nestTuple in nestEl:
                nestLevel_range_list.extend(nestTuple[0])
                # nestTuple[0] should correspond to list of char positions
            #
            nestDict_Ranges[key] = nestEl
            nestDict_Ranges[key].append(nestLevel_range_list)
        #
        return nestDict_Ranges
    #
    # "|(AN.((IR2%IR3).((AN&AN)+(IR3xAN))).((AN.IR3)xNITA))|" Test sign
    #
    @staticmethod
    def get_OpDict_list(nestDict_Ranges, opPosition_list):
        """
        Gets the operator levels plus one position before and after the
        operator position. Maps all of this to a dictionary.
        Appends the dictionary to a list
        """
        #
        opDictList = []
        #
        for opPosition in opPosition_list:
            for level, nestEl in nestDict_Ranges.items():
                nestRangeList = nestEl[-1]
                if opPosition[0] in nestRangeList:
                    posPlace = nestRangeList.index(opPosition[0])
                    posDict = {}
                    posDict["operatorPosition_nestlevel"] = level
                    posDict["operatorPosition_after"] = nestRangeList[posPlace+1:posPlace+4]
                    # This for checking modifier types afterwards
                    # Especially the rotation.
                    posDict["operatorPosition_before"] = nestRangeList[posPlace-1]
                    # Might come in handy for checking 'repeated' operator
                    posDict["operatorPosition_position"] = opPosition[0]
                    posDict["operatorPosition_operator"] = opPosition[1]
                    opDictList.append(posDict)
                #
        #
        return opDictList
        #
    @staticmethod
    def get_OpLevelPosition(opDictList):
        """
        Eliminates the duplicate occurances
        for the operators. Only the
        highest level in which the
        operator occured is retained.
        Function groups the operators
        according to their positions
        then makes a list with the highest levels
        within the group.
        """
        #
        opdictsSorted = sorted(opDictList, key=lambda opDict:opDict["operatorPosition_position"]) # Sort list according to operator positions
        opDictsGrouped = [list(group) for key, group in itertools.groupby(opdictsSorted, key=lambda x:x["operatorPosition_position"])]
        # Group elements according to operator positions
        opDictGroupsSort = [sorted(groupList, key=lambda opDict:opDict["operatorPosition_nestlevel"]) for groupList in opDictsGrouped]
        # Sort group list according to the nest level
        operatorPos_level_list = [sorted_group[-1] for sorted_group in opDictGroupsSort]
        #
        return operatorPos_level_list
    #
    def get_SignRelationBS(self,
                           operatorPos_level_list,
                           nestLevel_dict_list,
                           compositeSign):
        """
        Gets the sign or sign groups that
        are associated with each other through
        a binary scoped operator
        """
        #
        signRelation_dict_list = []
        #
        for operatorPos_level in operatorPos_level_list:
            operatorNestLevel = operatorPos_level["operatorPosition_nestlevel"]
            operatorPos = operatorPos_level["operatorPosition_position"]
            operator = operatorPos_level["operatorPosition_operator"]
            for nestLevel_dict in nestLevel_dict_list:
                nestRange = nestLevel_dict["nest_range"]
                nestLevel = nestLevel_dict["nest_level"]
                nestContent = nestLevel_dict["nest_content"]
                if self.test_isBinaryScope(operator) is True:
                    # x and @ will be handled individually
                    # we test only for % and &
                    if operatorPos in nestRange and operatorNestLevel == nestLevel:
                        opPosinRange = nestRange.index(operatorPos)
                        opPrecedents = nestRange[1:opPosinRange]
                        # 1 for excluding the (
                        opFollowers = nestRange[opPosinRange+1:-1]
                        # -1 for excluding )
                        opPrecLength = len(opPrecedents)
                        opPrecChars = nestContent[:opPrecLength]
                        opFolChars = nestContent[opPrecLength+1:]
                        # +1 for excluding the operator
                        signRelation_dict = {}
                        signRelation_dict["SR_operator"] = operator
                        signRelation_dict["SR_operator_antec"] = opPrecChars
                        signRelation_dict["SR_operator_subsq"] = opFolChars
                        signRelation_dict["SR_nest_level"] = nestLevel
                        signRelation_dict["SR_nest_content"] = nestContent
                        signRelation_dict["SR_compositeSign"] = compositeSign
                        signRelation_dict["SR_nest_range"] = nestRange
                        if "(" in opPrecChars and ")" in opPrecChars and ")" in opFolChars and "(" in opFolChars:
                            signRelation_dict["SR_relation_type"] = {"operator_antecedent":"Group", "operator_subsequent":"Group"}
                        elif "(" in opPrecChars and ")" in opPrecChars and ")" not in opFolChars and not "(" in opFolChars:
                            signRelation_dict["SR_relation_type"] = {"operator_antecedent":"Group", "operator_subsequent":"Sign"}
                        elif "(" not in opPrecChars and ")" not in opPrecChars and ")" in opFolChars and "(" in opFolChars:
                            signRelation_dict["SR_relation_type"] = {"operator_antecedent":"Sign", "operator_subsequent":"Group"}
                        elif "(" not in opPrecChars and ")" not in opPrecChars and ")" not in opFolChars and "(" not in opFolChars:
                            signRelation_dict["SR_relation_type"] = {"operator_antecedent":"Sign", "operator_subsequent":"Sign"}
                        signRelation_dict["SR_operator_position"] = operatorPos
                        if operator == "%":
                            signRelation_dict["SR_operator_type"] = "crossing"
                        elif operator == "&":
                            signRelation_dict["SR_operator_type"] = "above"
                        signRelation_dict["SR_operator_antec_range"] = opPrecedents
                        signRelation_dict["SR_operator_subseq_range"] = opFollowers
                        self.signRelation_dict_list.append(signRelation_dict)
        #
        return self.signRelation_dict_list
    #
    def get_SignRelationSpeCases(self,operatorPos_level_list, nestLevel_dict_list, compositeSign):
        """
        Gets the sign or sign groups that
        are associated with each other through
        x and @ operators
        """
        #
        signRelation_dict_list = []
        #
        for operatorPos_level in operatorPos_level_list:
            operatorNestLevel = operatorPos_level["operatorPosition_nestlevel"]
            operatorPos = operatorPos_level["operatorPosition_position"]
            operator = operatorPos_level["operatorPosition_operator"]
            for nestLevel_dict in nestLevel_dict_list:
                nestRange = nestLevel_dict["nest_range"]
                nestLevel = nestLevel_dict["nest_level"]
                nestContent = nestLevel_dict["nest_content"]
                if operatorPos in nestRange and operatorNestLevel == nestLevel:
                    opPosinRange = nestRange.index(operatorPos)
                    opPrecedents = nestRange[1:opPosinRange]
                    # 1 for excluding the (
                    opFollowers = nestRange[opPosinRange+1:-1]
                    # -1 for excluding )
                    opPrecLength = len(opPrecedents)
                    opPrecChars = nestContent[:opPrecLength]
                    opFolChars = nestContent[opPrecLength+1:]
                    # +1 for excluding the operator
                    signRelation_dict = {}
                    signRelation_dict["SR_operator"] = operator
                    signRelation_dict["SR_operator_antec"] = opPrecChars
                    signRelation_dict["SR_operator_subsq"] = opFolChars
                    signRelation_dict["SR_nest_level"] = nestLevel
                    signRelation_dict["SR_nest_content"] = nestContent
                    signRelation_dict["SR_compositeSign"] = compositeSign
                    signRelation_dict["SR_nest_range"] = nestRange
                    if "(" in opPrecChars and ")" in opPrecChars and ")" in opFolChars and "(" in opFolChars:
                        signRelation_dict["SR_relation_type"] = {"operator_antecedent":"Group", "operator_subsequent":"Group"}
                    elif "(" in opPrecChars and ")" in opPrecChars and ")" not in opFolChars and not "(" in opFolChars:
                        signRelation_dict["SR_relation_type"] = {"operator_antecedent":"Group", "operator_subsequent":"Sign"}
                    elif "(" not in opPrecChars and ")" not in opPrecChars and ")" in opFolChars and "(" in opFolChars:
                        signRelation_dict["SR_relation_type"] = {"operator_antecedent":"Sign", "operator_subsequent":"Group"}
                    elif "(" not in opPrecChars and ")" not in opPrecChars and ")" not in opFolChars and "(" not in opFolChars:
                        signRelation_dict["SR_relation_type"] = {"operator_antecedent":"Sign", "operator_subsequent":"Sign"}
                    signRelation_dict["SR_operator_position"] = operatorPos
                    signRelation_dict["SR_operator_antec_range"] = opPrecedents
                    signRelation_dict["SR_operator_subseq_range"] = opFollowers
                    if operator == ".":
                        signRelation_dict["SR_operator_type"] = "beside"
                    elif operator == "+":
                        signRelation_dict["SR_operator_type"] = "joining"
                    elif operator == "x" and opPrecChars.isdigit():
                        signRelation_dict["SR_operator_type"] = "repeated"
                    elif operator == "x" and not opPrecChars.isdigit():

                        signRelation_dict["SR_operator_type"] = "containing"
                    elif operator == "@":
                        if re.search("^\d+", opFolChars) is not None:
                        # This means that the @ sign is
                        # a modifier here so we restart looping
                            continue
                        else:
                            opFolCharsOper = nestContent[opPrecLength:opPrecLength+3]
                            # Includes the operator @
                            if "@c" in opFolCharsOper or "@f" in opFolCharsOper or "@g" in opFolCharsOper or "@s" in opFolCharsOper or "@s" in opFolCharsOper or "@t" in opFolCharsOper or "@n" in opFolCharsOper or "@z" in opFolCharsOper or "@k" in opFolCharsOper or "@r" in opFolCharsOper or "@h" in opFolCharsOper or "@v" in opFolCharsOper:
                                # This means that @ sign is
                                # a modifier so we restart looping
                                continue
                            else:
                                signRelation_dict["SR_operator_type"] = "opposing"
                    #
                    self.signRelation_dict_list.append(signRelation_dict)
                #
        #
        return self.signRelation_dict_list
    #
    @staticmethod
    def get_unNestedCompSigns(compositeSign, opPosition_list):
        """
        gets the signs of composite sign
        that is not nested.
        """
        #
        signRelation_dict_list = []
        #
        for opPos in opPosition_list:
            opP = opPos[0]
            opChar = opPos[1]
            opAnte = compositeSign[:opP]
            opSubseq = compositeSign[opP:]
            signRelation_dict = {}
            signRelation_dict["SR_operator"] = opChar
            signRelation_dict["SR_operator_antec"] = opAnte
            signRelation_dict["SR_operator_subsq"] = opSubseq[1:]
            # 1 for excluding the operator in mapping
            signRelation_dict["SR_compositeSign"] = compositeSign
            signRelation_dict["SR_nest_level"] = 0
            signRelation_dict["SR_nest_content"] = compositeSign
            signRelation_dict["SR_nest_range"] = list(range(0,len(compositeSign)))

            signRelation_dict["SR_operator_position"] = opP
            signRelation_dict["SR_relation_type"] = {"operator_antecedent":"Sign", "operator_subsequent":"Sign"}
            if opChar == "%":
                signRelation_dict["SR_operator_type"] = "crossing"
            elif opChar == "&":
                signRelation_dict["SR_operator_type"] = "above"
            elif opChar == ".":
                signRelation_dict["SR_operator_type"] = "beside"
            elif opChar == "+":
                signRelation_dict["SR_operator_type"] = "joining"
            elif opChar == "x" and opAnte.isdigit():
                signRelation_dict["SR_operator_type"] = "repeated"
            elif opChar == "x" and not opAnte.isdigit():
                signRelation_dict["SR_operator_type"] = "containing"
            elif operator == "@":
                if re.search("^\d+",opSubseq[1:]) is not None:
                    # Starts from 1 because opSubseq[0]== operator
                # This means that the @ sign is
                # a modifier here so we restart looping
                    continue
                else:
                    opFolCharsOper = opSubseq[0:2]
                    # Includes the operator @
                    if "@c" in opFolCharsOper or "@f" in opFolCharsOper or "@g" in opFolCharsOper or "@s" in opFolCharsOper or "@s" in opFolCharsOper or "@t" in opFolCharsOper or "@n" in opFolCharsOper or "@z" in opFolCharsOper or "@k" in opFolCharsOper or "@r" in opFolCharsOper or "@h" in opFolCharsOper or "@v" in opFolCharsOper:
                        # This means that @ sign is
                        # a modifier so we restart looping
                        continue
                    else:
                        signRelation_dict["SR_operator_type"] = "opposing"
            signRelation_dict["SR_operator_antec_range"] = list(range(0, opP))
            signRelation_dict["SR_operator_subseq_range"] = list(range(opP, len(compositeSign)))
            #
            self.signRelation_dict_list.append(signRelation_dict)
        #
        return self.signRelation_dict_list
    #
    @staticmethod
    def get_signsSR(signRelDict):
        """
        Gets signs from the sign dict.
        """
        #
        compoundSign_signList = []
        if signRelDict["SR_relation_type"]["operator_antecedent"] == "Sign" and signRelDict["SR_relation_type"]["operator_subsequent"] == "Sign":
            compoundSign_signList.append(signRelDict["SR_operator_antec"])
            compoundSign_signList.append(signRelDict["SR_operator_subsq"])
            compoundSign_signList.append(signRelDict)
            #
        elif signRelDict["SR_relation_type"]["operator_antecedent"] == "Group" and signRelDict["SR_relation_type"]["operator_subsequent"] == "Sign":
            compoundSign_signList.append(signRelDict["SR_operator_subsq"])
            compoundSign_signList.append(signRelDict)
            #
        elif signRelDict["SR_relation_type"]["operator_antecedent"] == "Sign" and signRelDict["SR_relation_type"]["operator_subsequent"] == "Group":
            compoundSign_signList.append(signRelDict)
        #
        return compoundSign_signList
    #
    def get_signComplement(self):
        """
        Gets the signs from a sign
        that has a complement
        """
        #
        complement_sign_list = []

        if self.test_isComplement(self.catf_sign) is True:
            compSplit = self.catf_sign.split("+")
            complement_sign = compSplit[1]
            complement_sign_list.append(complement_sign)
        #
        return complement_sign_list
    #
    @staticmethod
    def char_convert(sign):
        """
        Convert CDLI C-ATF characters
        to unicode
        """
        #
        text_sz = sign.replace("sz","\u0161") # sz -> š
        text_SZ = text_sz.replace("SZ", "\u0160") # SZ -> Š
        text_sPo = text_SZ.replace("s,", "\u1e63") # s, -> ṣ
        text_SPo = text_sPo.replace("S,", "\u1e62") # S, -> Ṣ
        text_tch = text_SPo.replace("t,", "\u1e6d") # t, -> ṭ
        text_TCH = text_tch.replace("T,", "\u1e6c") # T, -> Ṭ
        text_s = text_TCH.replace("s'", "\u015b") # s' -> ś
        text_S = text_s.replace("S'","\u015a") # S' -> Ś
        text_ayn = text_S.replace("'", "\u02be") # ' -> ʾ
        text_sub0 = text_ayn.replace("0","\u2080")# Subscript numbers
        text_sub1 = text_sub0.replace("1","\u2081")
        text_sub2 = text_sub1.replace("2","\u2082")
        text_sub3 = text_sub2.replace("3","\u2083")
        text_sub4 = text_sub3.replace("4","\u2084")
        text_sub5 = text_sub4.replace("5","\u2085")
        text_sub6 = text_sub5.replace("6","\u2086")
        text_sub7 = text_sub6.replace("7","\u2087")
        text_sub8 = text_sub7.replace("8","\u2088")
        text_sub9 = text_sub8.replace("9","\u2089")
        text_subx = text_sub9.replace("x²","\u208a") # subscript x
        text_subX = text_subx.replace("X²","\u208a")
        text_h = text_subX.replace("h,", "\u1e2b") # h, -> ḫ
        text_H = text_h.replace("H,", "\u1e2a") # H, -> Ḫ
        text_j = text_H.replace("j","\u014b") # j -> ŋ
        text_J = text_j.replace("J","\u014a") # J -> Ŋ
        #
        return text_J
    #
    @staticmethod
    def signDictBuild(sign):
        """
        params:
        sign, str.
        C(ompound/complement) S(ign), boolean

        Returns the sign dict
        with all the features.
        """
        #
        signDict = {}
        tester_class = cAtfSignTester(sign)
        signDict["sign_sign"] = sign
        signDict["sign_isComplement"] = tester_class.test_isComplement()
        signDict["sign_isQuery"] = tester_class.test_is_query()
        signDict["sign_isCorrection"] = tester_class.test_is_correction()
        signDict["sign_isCollation"] = tester_class.test_is_collation()
        signDict["sign_isCurved"] = tester_class.test_isCurved()
        signDict["sign_isFlat"] = tester_class.test_isFlat()
        signDict["sign_isGunu"] = tester_class.test_isGunu()
        signDict["sign_isSheshig"] = tester_class.test_isSheshig()
        signDict["sign_isTenu"] = tester_class.test_isTenu()
        signDict["sign_isNutillu"] = tester_class.test_isNutillu()
        signDict["sign_isZidatenu"] = tester_class.test_isZidatenu()
        signDict["sign_isKabatenu"] = tester_class.test_isKabatenu()
        signDict["sign_isVertReflected"] = tester_class.test_isVertReflected()
        signDict["sign_hasAllograph"] = tester_class.test_hasAllograph()
        signDict["sign_hasSpecialAllograph"] = tester_class.test_hasSpecialAllograph()
        signDict["sign_isHorReflected"] = tester_class.test_isHorReflected()
        signDict["sign_isVariant"] = tester_class.test_isVariant()
        signDict["sign_isRotated"] = tester_class.test_isRotated()
        #signDict["sign_isPartOfComposite"] = test_isComposite()
        #signDict["sign_nestLevel"] = 0 Composite değilse
        #signDict["sign_isUnknownReading"] = test_isUnknownReading() # Composite değilse
        #signDict["sign_relatedSigns"] = {} # Buraya composite
        # işaretleri oluşturan liste eklenecek
        return signDict
    #
    def buildSignDict(self):
        """
        Wraps the methods defined throughout the class.
        """
        #
        sign_dict_list = []
        #
        if self.test_isComposite() is True and self.test_isSpecification() is True:
            # Basically it is a nested composite sign
            compositeSign = self.get_compositeSign()
            nestedElements = self.get_nestElements(compositeSign)
            opPositonList = self.get_OpPositions(compositeSign)
            nestList = list(nestedElements)
            nestLevelDictList = self.get_nestLevelDict(nestList)
            nest_dict = self.get_nestDict(nestList)
            nest_dict_levelRange = self.nestDict_LevelRangeCreator(nest_dict)
            opDict_list = self.get_OpDict_list(
                nest_dict_levelRange,
                opPositonList
            )
            opLvlPosition = self.get_OpLevelPosition(opDict_list)
            SR_dictList_BS = self.get_SignRelationBS(
                opLvlPosition,
                nestLevelDictList,
                compositeSign
            )
            SR_dictList_SCases = self.get_SignRelationSpeCases(
                opLvlPosition,
                nestLevelDictList,
                compositeSign
            )
            SR_dictList = SR_dictList_SCases + SR_dictList_BS
            compoundSign_SR_lists_brut = [self.get_signsSR(SignDict) for SignDict in SR_dictList]
            # There are empty list in the brut file
            # Created by the group - group associations
            compoundSign_SR_lists = list(filter(None, compoundSign_SR_lists_brut))
            # They are filtered now.
            for compoundSignList in compoundSign_SR_lists:
                SR_dict = compoundSignList[-1]
                for signElement in compoundSignList:
                    if not isinstance(signElement, dict):
                        self.signDict = self.signDictBuild(signElement)
                        self.signDict["sign_isPartOfComposite"] = True
                        self.signDict["sign_isUnknownReading"] = False
                        self.signDict["sign_relatedSigns"] = SR_dict
                        self.signDict["sign_nestLevel"] = SR_dict["SR_nest_level"]
                        self.signDict["sign_compositeSign"] = SR_dict["SR_compositeSign"]
                        sign_dict_list.append(self.signDict)
        # Compound Nested DONE
        #
        elif self.test_isComposite() is True and self.test_isSpecification() is False:
            # Compound Not Nested
            compositeSign = self.get_compositeSign()
            opPositonList = self.get_OpPositions(compositeSign)
            unNestedList = self.get_unNestedCompSigns(
                compositeSign, opPositonList
            )
            compoundSign_SR_lists = [self.get_signsSR(SignDict) for SignDict in unNestedList]
            for compoundSignList in compoundSign_SR_lists:
                SR_dict = compoundSignList.pop()
                for signElement in compoundSignList:
                    self.signDict = self.signDictBuild(signElement)
                    self.signDict["sign_isPartOfComposite"] = True
                    self.signDict["sign_isUnknownReading"] = False
                    self.signDict["sign_relatedSigns"] = SR_dict
                    self.signDict["sign_nestLevel"] = SR_dict["SR_nest_level"]
                    self.signDict["sign_compositeSign"] = SR_dict["SR_compositeSign"]
                    sign_dict_list.append(self.signDict)
                    #
        # Compound not Nested DONE
        #
        elif self.test_isComposite() is False and self.test_isComplement() is True:
            # Not a Compound Sign but is a complement
            complementSignList = self.get_signComplement(sign)
            for complementSign in complementSignList:
                self.signDict = self.signDictBuild(complementSign)
                self.signDict["sign_isPartOfComposite"] = False
                self.signDict["sign_isUnknownReading"] = self.test_isUnknownReading(sign)
                self.signDict["sign_relatedSigns"] = {} # TODO get Related Sign for Complement Signs
                self.signDict["sign_nestLevel"] = 0
                self.signDict["sign_compositeSign"] = ""
                sign_dict_list.append(self.signDict)
        # Complement sign DONE
        #
        elif self.test_isComplement() is False and self.test_isComposite() is False:
            self.signDict = self.signDictBuild(self.catf_sign)
            self.signDict["sign_isPartOfComposite"] = False
            self.signDict["sign_isUnknownReading"] = self.test_isUnknownReading()
            self.signDict["sign_isDamaged"] = self.test_isDamaged()
            self.signDict["sign_relatedSigns"] = {} # TODO get Related Sign
            self.signDict["sign_nestLevel"] = 0
            self.signDict["sign_compositeSign"] = ""
            sign_dict_list.append(self.signDict)
        #
        return sign_dict_list
    #
    # Algorithm DONE
    # Tests! DONE


class cAtfTextBuilder(object):
    """
    Builds the brut text as a feature
    dictionary, by calling the methods
    from the classes above.
    """
    #
    def __init__(self, text):
        #
        self.text_brut = text
        self.atf_section = ""
        self.object_parts_list = []
        self.objectIdPart = []
        self.catf_text_dict = {}
        self.objectPartLines_list = []
        self.objectTextParts = []
        self.textPart_dict_list = []
    #
    # Section Methods
    #
    def get_atf_section(self):
        """
        params: atf_file, str.
        return: atf_section, str.

        Takes a text given as the text output
        of the cdli splits the atf section
        for later use.
        """
        #
        find_atf_section = re.search("&P\d+.*", self.text_brut, re.DOTALL)
        #
        self.atf_section = find_atf_section.group(0)
        #
        return self.atf_section
    #
    def get_object_parts(self):
        """
        params: atf_section, str.
        return: object_part_list, []
        """
        #
        try:
            if "\n" not in self.atf_section:
                raise ValueError("Newline character doesn't match to expected unix input type")
            else:
                pass
        except ValueError as newlineError:
            print(newlineError)
            print("\n\n check if you have indeed specified \\n as \n the newline character while opening the text.")
            return
        else:
            pass
        object_part_split = self.atf_section.split("\n@")
        object_part_id_part = object_part_split[0]
        object_part_parts = object_part_split[1:]
        self.object_parts_list = ["@" + part for part in object_part_parts]
        self.object_parts_list.insert(0,object_part_id_part)
        #
        return self.object_parts_list
    #
    def splitLinesOParts(self):
        """
        Splits the object part
        into lines
        """
        #
        self.objectPartLines_list = [objectPart.splitlines() for objectPart in self.object_parts_list]
        #
        return self.objectPartLines_list
    #
    def get_ObjetIdPart(self):
        """
        Gets the part in which
        the id of the text occurs

        # In objectPartLines_list:
        # [0] is the id part, [1] is the type part
        # [2] is the text parts

        """
        #
        self.objectIdPart = self.objectPartLines_list[0]
        #
        return self.objectIdPart
    #
    def get_text_id(self):
        """
        Gets the text id from the
        object id part
        """
        #
        for line in self.objectIdPart:
            c_atf_line = cAtfLineGetter(line)
            if len(c_atf_line.get_id_line()) != 0:
               self.catf_text_dict["text_id"] = c_atf_line.get_id_line()
            elif len(c_atf_line.get_id_alternatives()) != 0:
                self.catf_text_dict["text_id_alternatives"] = c_atf_line.get_id_alternatives()
            elif len(c_atf_line.get_language_line()) != 0:
                self.catf_text_dict["text_language"] = c_atf_line.get_language_line()
        #
        return self.catf_text_dict
    #
    def get_objectTypePart(self):
        """
        Gets the parts of
        the text indicated by @

        # In objectPartLines_list:
        # [0] is the id part, [1] is the type part
        # [2] is the text parts

        """
        #
        self.objectTypePart = self.objectPartLines_list[1][0].strip()
        # [1] corresponds to the list which contains only the type string
        # Hence [0].strip()
        #
        return self.objectTypePart
    #
    def get_textParts(self):
        """
        Gets the list of text parts
        from the object part list

        This should correspond to [2:]
        """
        #
        self.objectTextParts = self.objectPartLines_list[2:]
        #
        return self.objectTextParts
    #
    def set_text_PartInfo(self):
        """
        Sets what we have so far
        to the text dictionary
        """
        #
        self.catf_text_dict["text_objectType"] = self.objectTypePart
        self.catf_text_dict["text_textPartCount"] = len(self.objectTextParts)
        #[2:] because [0] is the id part and [1] is the type part
        #
        return self.catf_text_dict
    #
    @staticmethod
    def textPartString(textPart):
        """
        params: textPart, []
        return: textPart_str, ''

        Regroups the lines
        belonging to the part in
        string form for handling
        Another Language Occurances
        """
        #
        partLines = textPart[1:]
        # Since [0] is the part title indicated with @
        # the rest should be text lines, comments, etc.
        textPart_str = "\n".join(partLines)
        #
        return textPart_str
    #
    @staticmethod
    def get_ALs(textPart_str):
        """
        Passes the textPart_str to AL
        handler for getting Another Language
        occurances
        """
        #
        alClass = cAtfALHandler(textPart_str)
        alOcS = alClass.get_ALOcS()
        #
        return alOcS
    #
    @staticmethod
    def lineDicts(textPartLine):
        """
        Converts the text part line
        to a line dict
        """
        #
        lineClass = cAtfLineDictBuilder(textPartLine)
        lineDict = lineClass.lineDictBuild()
        #
        return lineDict
    #
    @staticmethod
    def worDictBuilder(lineWord):
        """
        Converts the words inside
        a line dict to a
        wordDict by using cAtfWordDictBuilder
        """
        #
        wordClass = cAtfWordDictBuilder(lineWord)
        #
        word_dict = wordClass.wordDictBuild()
        #
        return word_dict
    #
    @staticmethod
    def signDictBuilder(WordSign):
        """
        Converts the signs inside
        a word dict to
        a signDict by using
        cAtfSignDictBuilder
        """
        #
        signClass = cAtfSignDictBuilder(WordSign)
        sign_dict = signClass.buildSignDict()
        #
        return sign_dict
    #
    def get_SignDicts(self, wordDict):
        """
        Builds sign dicts for the signs
        in a word dict.
        """
        signs = wordDict["word_Signs"]
        signDict_list = [self.signDictBuilder(sign) for sign in signs]
        wordDict["word_Signs"] = signDict_list
        #
        return wordDict
    #
    def get_WordDicts(self, lineDict):
        """
        Builds word dicts for words
        in a line dict
        """
        #
        words = lineDict["lineWords"]
        wordDict_list = [self.worDictBuilder(word) for word in words]
        lineDict["lineWords"] = wordDict_list
        #
        return lineDict
    #
    def set_partDict(self, textPart):
        """
        Creates the part dictionary
        from the textpart which is an
        element of the objectpart list
        """
        #
        part_dict = {}
        #
        part_dict["part_partTitle"] = textPart[0].strip()
        part_string = self.textPartString(textPart)
        part_dict["part_partString"] = part_string
        partlines = textPart[1:]
        alOccurances = self.get_ALs(part_string)
        # pass text language to al occurances TODO
        part_dict["part_AL_occurances"] = alOccurances
        # and the Adventure of Iteration starts ...
        partLine_dict_list = []
        for line in partlines:
            line_dict = self.lineDicts(line)
            lineWord_dict = self.get_WordDicts(line_dict)
            # text language can be passed to lines here TODO
            lineWordDict_list = lineWord_dict["lineWords"]
            linewordsign_dict_list = []
            for lineWordDict in lineWordDict_list:
                wordSigndict = self.get_SignDicts(lineWordDict)
                linewordsign_dict_list.append(wordSigndict)
            lineWord_dict["lineWords"] = linewordsign_dict_list
            partLine_dict_list.append(lineWord_dict)
        #
                # TODO partlines aynı kalıyor
                #
        #
        part_dict["part_parLines"] = partLine_dict_list
        #
        return part_dict
    #
    def buildTextDict(self):
        """
        Wraps the methods above for
        building the text dictionary
        """
        #
        self.get_atf_section()
        self.get_object_parts()
        # Text is splited into parts
        self.splitLinesOParts()
        # Each object part is splited into lines
        self.get_ObjetIdPart()
        # The part in which one observes the object id
        # is seperated
        self.get_text_id()
        # From the object id part
        # the text id is taken
        self.get_objectTypePart()
        # From the object part list
        # object type part is taken
        self.get_textParts()
        # from the object parts that
        # has been divided into lines
        # textparts are taken
        self.set_text_PartInfo()
        # The type information
        # and partCount is added to
        # text dictionary
        self.textPart_dict_list = [self.set_partDict(textpart) for textpart in self.objectTextParts]
        # part dict is created for each text part.
        self.catf_text_dict["text_textParts"] = self.textPart_dict_list
        #
        return self.catf_text_dict


    # TODO Part Level daki işleri yazmaya devam et
    # AL ile ilgili işler bu seviyede unutma
    # Satır seviyesine geldiğinde sözcük ve işaret
    # metodlarını unutma Allah kerim


"""

c_atf_text_dict = {
'textId':str,
'textWordCount':int
'textPartCount':int
'textLineCount':int
'textSignCount':int
'textParts':[{part_dict},{part_dict}, ... ]
}

part_dict = {
'partTitle':str,
'partWordCount':int,
'partTextLineCount':int,
'partSignCount': int,
'partLineStructures':[str, str,] # Lines starting with $
'partLineContents':[str, str,] # Lines starting with #
'partTextLines':[str,str,]
'partLines:[{line_dict}, {line_dict}, ...]
}

line_dict = {
'isLineStructure':boolean
'isLineContent':boolean
'lineNumber':int
'lineWordCount:int
'lineText':str.
'lineWords':[{word_dict}, {word_dict}, ... ]
}

word_dict = {
'word':str,
'hasDeterminative':boolean
'determinatives':[(prepos, LÚ),(postpos, MESZ), ...]
'hasDamaged':boolean
'relativePositionInLine': int.
'relativePositionInObjectPart':int
'absolutePositionInText':int
'isAnotherLanguage':boolean
'wordSignCount':int
'wordSigns':[{sign_dict},{sign_dict}, ... ]
}

sign_dict = {
'sign':str.
'isDamaged':boolean
'isDeterminative':boolean
'relativePositionInWord':int
'relativePositionInObjectPart':int
'relativePositionInLine':int
'absolutePositionInText':int
'isComplement':boolean
'isUnknownReading':boolean
'isDifferentLanguage':boolean
'language':str.
}

"""

test_get_file = get_atf_section(test_file)

test_object_parts = get_object_parts(test_get_file)

test_lines = [x.splitlines() for x in test_object_parts]

testLine = test_lines[2][3]

test_get_words = get_words(testLine)

testWord = test_get_words[3].strip()

test_get_signs = get_signs(testWord)


# Take ids ----------------------------------------------

# DONE: Idler alınabiliyor

c_atf_text = {}

c_atf_text_liste = []

for test_line in test_lines[0]:
    c_atf_line = cAtfLineGetter(test_line)
    if len(c_atf_line.get_id_line()) != 0:
        c_atf_text["text_id"] = c_atf_line.get_id_line()
    elif len(c_atf_line.get_id_alternatives()) != 0:
        c_atf_text["text_id_alternatives"] = c_atf_line.get_id_alternatives()
    elif len(c_atf_line.get_language_line()) != 0:
        c_atf_text["text_language"] = c_atf_line.get_language_line()


# ------------------------------------------------------


# TODO: Improvement, get related lines for the comments about content

# DONE: Take part names, comment lines, and text lines in a part dictionary.

object_parts = test_lines[1:]

part_dict_list = []

for part in object_parts:
    part_dict = {}
    part_dict["partLineContents"] = []
    part_dict["partLineStructures"] = []
    part_dict["partLines"] = []
    part_dict["part_no"] = object_parts.index(part)
    part_dict["part_title"] = part[0].strip()
    for line in part:
        part_class = cAtfLineGetter(line)
        part_dict["partLineContents"].append(part_class.get_content_comment())
        part_dict["partLineStructures"].append(part_class.get_structure_comment())
        part_dict["partLines"].append(part_class.get_text_line())
        #
    part_dict_list.append(part_dict)

# ---------------------------------------------


test_al_class = cAtfALHandler(test_object_parts[2])


test_part_lines = test_al_class.splitPartLines() # DONE
test_get_line_dict_list = test_al_class.get_lineDict_list() # DONE

test_alrefs_line = test_al_class.get_ALRefs_lineLevel() # DONE


test_group_alrefs = list(test_al_class.group_ALRefs()) # DONE

# 734 - 1215

test_populate_mul = test_al_class.populate_mulALOC_refs() # DONE

test_populate_sing = test_al_class.populate_singALOC_refs() # DONE

test_multiline = test_al_class.get_mulAlOc_lineDict_list() # DONE


test_mulAl = test_al_class.get_mulAlOcS() # DONE

test_singAL = test_al_class.get_singALOcS() # DONE


test_al_ocs = test_al_class.get_ALOcS() # DONE

# Word class Tests -----------------------------

test_word = "{gesz-gesz-gesz}{gesz-an-il}bu-ut,-ni{gesz-mesz-gesz}e2-gal-za3#-di-nu#-tuku-a{gesz}#"

test_Wclass = cAtfWordDictBuilder(test_word)

seps = test_Wclass.set_sign_seperators() # DONE

detRfs = test_Wclass.get_detRefs() # DONE
# 1262 1628

g_derfs = test_Wclass.group_detRefs() # DONE

dSign = test_Wclass.get_detDictS() # DONE

wordDict = test_Wclass.wordDictBuild() # DONE

# Sign Class

# Test Signs

# Nested Compound Sign |(AN.((IR2%IR3).((AN@t~a&AN)+(IR3~txAN))).((AN.IR3)xNITA@r))|#


test_sign = "|(AN.((IR2%IR3).((AN@t~a&AN)+(IR3~txAN))).((AN.IR3)xNITA@r))|#"

# 2446 - 3076

testSignClass = cAtfSignDictBuilder(test_sign)

getComp = testSignClass.get_compositeSign() # DONE

getNestedSt = testSignClass.get_nestElements(getComp) # DONE

getOpPos = testSignClass.get_OpPositions(getComp) # DONE

a = list(getNestedSt)

getNestlevelDList = testSignClass.get_nestLevelDict(a) # DONE


getNestdict = testSignClass.get_nestDict(a) # DONE

diRang = testSignClass.nestDict_LevelRangeCreator(getNestdict) # DONE

opdictList = testSignClass.get_OpDict_list(diRang, getOpPos) # DONE

opLevPos = testSignClass.get_OpLevelPosition(opdictList) # DONE

SR_BS = testSignClass.get_SignRelationBS(
    opLevPos,
    getNestlevelDList,
    getComp
) # DONE

SR_SpeCas = testSignClass.get_SignRelationSpeCases(
    opLevPos,
    getNestlevelDList,
    getComp
) # DONE

SR_list = SR_BS + SR_SpeCas

signsSR = [testSignClass.get_signsSR(signDict) for signDict in SR_list]

# DONE
sign = test_sign

signDict = testSignClass.buildSignDict() # DONE

# Compound Nested Signs are DONE

# Unnested Compound Sign |AN.AN.AN+AN.AN+AN+AN|#


test_sign = "|AN.IR3.BAR+TAM2.MESZ+AN+AN|#"

# 2446 - 3076

testSignClass = cAtfSignDictBuilder(test_sign)

getComp = testSignClass.get_compositeSign() # DONE

getOpPos = testSignClass.get_OpPositions(getComp) # DONE


getUnNested = testSignClass.get_unNestedCompSigns(getComp, getOpPos) # DONE

signsSR = [testSignClass.get_signsSR(signDict) for signDict in getUnNested]
# DONE

signDict = testSignClass.buildSignDict() # DONE

# Determinative Part {an


test_sign = "{frk}#"

testSignClass = cAtfSignDictBuilder(test_sign)

mk = testSignClass.signDictBuild(test_sign)

signDict = testSignClass.buildSignDict()


# Damaged Simple Sign nu# # DONE


test_sign = "nu#"

testSignClass = cAtfSignDictBuilder(test_sign)

mk = testSignClass.signDictBuild(test_sign)

signDict = testSignClass.buildSignDict()


# Modified Damaged Sign AN@t# # DONE


test_sign = "AN@t#"

testSignClass = cAtfSignDictBuilder(test_sign)

mk = testSignClass.signDictBuild(test_sign)

signDict = testSignClass.buildSignDict()


# Rotated sign AN@123 # DONE

test_sign = "AN@123#"

testSignClass = cAtfSignDictBuilder(test_sign)

mk = testSignClass.signDictBuild(test_sign)

signDict = testSignClass.buildSignDict()


# 3061 - 3373

test_textClass = cAtfTextBuilder(test_file)

test_text = test_textClass.buildTextDict()


with open("ParserOutput.txt","w", encoding="utf-8", newline="\n") as f:
    f.write(str(test_text))