wangtzINT/setup.py

## setup.py
from distutils.core import setup
import py2exe

#This script is based on py2exe
setup(windows=["translation.py"])

## translation.py
import re
import xlrd
import codecs
import xlwt
from Tkinter import *
import tkFileDialog
import os

def getLineNumber(pos, content):
    lineNumber = content.count("\n", 0, pos+1)+1
    return lineNumber
    pass

def valide(content):

    sugar = lambda x: "<" + x + ">"
    # list of error messages
    output = []
    # check of mandatory fields
    # according to requirements, all fields are mandatory
    mandatoryFields = ["source", "addressA", "addressB", "editor", "time",
                       "questionNum", "memo", "inputTime",
                       "text0", "guide0"]
    for field in mandatoryFields:
        if "<" + field + ">" not in content:
            output.append("{0} field is mandatory but missed.".format(sugar(field)))
    # check of pair match
    pairs = [["text0", "text1"], ["guide0", "guide1"]]
    # add pairs
    # TODO: bug: detection of *pair0*removed pair1
    for i in xrange(1, 10000):
        questionTitle = "{0}question0".format(i)
        if sugar(questionTitle) in content:
            pairs.append(["{0}question0".format(i), "{0}question1".format(i)])
            pass
        else:
            break
    # if pair0 exists, while pair1 not -> problem
    for pair in pairs:
        posPair0 = content.find(sugar(pair[0]), 0)
        if posPair0 > 0:
            posPair1 = content.find(sugar(pair[1]), posPair0+1) # prevent overlapping
            if posPair1 == -1:
                lineNumber = getLineNumber(posPair0, content)
                output.append("line {0}, operator {1} is not closed."
                                  .format(lineNumber, sugar(pair[0])))
                pass
    if output != []:
        return "\n".join(output)
    pass

def processFile(filepath, filename):
    book = xlwt.Workbook()
    sheet1 = book.add_sheet('Sheet 1')

    # font
    font0 = xlwt.Font()
    font0.name = 'Times New Roman'
    style0 = xlwt.XFStyle()
    style0.font = font0

    """
    test Failed
    Simplified A office can not be launched...
    # convert word file to plain text file
    word = client.Dispatch('Word.Application')
    doc = word.Documents.Open(filename)
    doc.SaveAs(filename + '.txt', 7)
    doc.Close()
    """

    # Python need to know the encoding to read files
    # mbcs corresponds to ANSI in windows
    # utf-8 is UTF-8 in windows
    f = codecs.open(filepath, "r", "mbcs")
    content = f.read()

    # re.S dot match all
    # re.VERBOSE comments on regular expression
    articles = re.finditer(r"""
        \#<     # starter
        (.*?)   # all content, non-greedy
        \#>     # terminator
    """, content, re.S|re.VERBOSE)

    output = []

    # valide article sections
    pairs = ["#<", "#>"]
    lastPos = 0
    while True:
        pos0 = content.find(pairs[0], lastPos)
        if pos0 == -1:
            # there is no more left op
            pos1 = content.find(pairs[1], lastPos)
            if pos1 != -1:
                # still right op => left op is missing
                lineNumber = getLineNumber(pos1, content)
                return (0, "line {0}, #< is missing.".format(lineNumber))
            else:
                # no more right op => end of handle
                break
            break
        else:
            # still left op
            pos1 = content.find(pairs[1], lastPos + len(pairs[0]))
            if pos1 != -1:
                # still right op
                # make sure there is no right op before left op
                if pos1 > pos0:
                    # make a pair
                    lastPos = pos1 + len(pairs[1])
                else:
                    lineNumber = getLineNumber(pos1, content)
                    return (0, "line {0}, #< is missing.".format(lineNumber))
            else:
                # no more right op => right op is missing
                lineNumber = getLineNumber(pos0, content)
                return (0, "line {0}, #< is not closed.".format(lineNumber))
            pass

    # article information starts from line 1
    articleIdx = 0
    isNoArticle = True
    for article in articles:
        articleIdx = articleIdx + 1
        isNoArticle = False
        # terrible!
        global noErrorFound
        noErrorFound = True
        articleContent = article.group(1)
        # for each article


        # define wrapers
        # TODO: noErrorFound why not discloure
        def getFirstLineContent(s, content):
            global noErrorFound
            if "\n" in content:
                pos = re.search(r"\n", content).start()
                contentOfFirstLine = content[:pos]
                contentRightStrip = content.rstrip()
                if contentRightStrip != contentOfFirstLine.rstrip() or contentOfFirstLine.strip() == "":
                    if noErrorFound:
                        output.append("** In article {0}: ".format(articleIdx))
                        noErrorFound = False
                    pass
                    if contentRightStrip != contentOfFirstLine.rstrip():
                        output.append(u"line {0}, only the first line of text in <{1}> has been captured".
                                          format(getLineNumber(re.search("<{0}>".format(s), articleContent).start(),
                                                               articleContent),
                                                 s))
                    # content.strip() == "" has generate its error message before
                    if content.strip() != "" and \
                        contentOfFirstLine.strip() == "":
                        output.append(u"line {0}, <{1}> is empty".
                                          format(getLineNumber(re.search("<{0}>".format(s), articleContent).start(),
                                                               articleContent),
                                                 s))
                    pass
                content = content[:pos]
            return content
            pass

        validation = valide(articleContent)
        if validation is not None:
            if noErrorFound:
                output.append("** In article {0}: ".format(articleIdx))
                noErrorFound = False
            output.append("\n".join(validation.strip().split("\n")))

        rawSections = re.finditer(r"""
            <(?P<title>
                [\ \t]*\w*[\ \t]*     # match title
            )>
            (?P<content>
                .*?             # match content, non-greedy
            )
            (?=                 # possitive lookahead (not counted for next iter
                (?P<end>
                    <[\ \t]*\w*[\ \t]*>|      # next title
            #       \#>|        # or article teminitor (removed, cause it's not in the section)
                    $           # or end of section
                )
            )
        """, articleContent, re.S|re.VERBOSE)

        exsitingSectionDict = dict()

        for rawSection in rawSections:
            # check for nested marker
            titleLabel = rawSection.group("title")
            # labels larger than 10 will be discarded
            if len(titleLabel) > 100 :
                continue
            if titleLabel != titleLabel.strip():
                if noErrorFound:
                    output.append("** In article {0}: ".format(articleIdx))
                    noErrorFound = False
                    pass
                output.append(u"line {0}, <{1}> is not valid, do you mean <{2}>?".
                                  format(getLineNumber(rawSection.start("title"),
                                                       articleContent),
                                         titleLabel, titleLabel.strip()))
                # no valid section, skip following processing
                continue
            endLabel = rawSection.group("end")[1:-1]
            if titleLabel[-1:] == "0" and titleLabel[:-1] != endLabel[:-1]:
                if noErrorFound:
                    output.append("** In article {0}: ".format(articleIdx))
                    noErrorFound = False
                    pass
                output.append(u"line {0} - {1}, unexpected nested labels (plz check <{2}> and <{3}>)".
                                  format(getLineNumber(rawSection.start("title"),
                                                       articleContent),
                                         getLineNumber(rawSection.start("end"),
                                                       articleContent),
                                         titleLabel,
                                         endLabel))
                pass

            # avoid empty section
            if titleLabel[-1:] != "1" and \
                rawSection.group("content").strip() == "":
                if noErrorFound:
                    output.append("** In article {0}: ".format(articleIdx))
                    noErrorFound = False
                    pass
                output.append(u"line {0}, <{1}> is empty".
                                  format(getLineNumber(rawSection.start("title"),
                                                       articleContent),
                                         titleLabel))
                pass

            # avoid duplicated marker
            if titleLabel not in exsitingSectionDict:
                # normal
                exsitingSectionDict[titleLabel] = rawSection.group("content")

            else:
                # duplicate
                if noErrorFound:
                    output.append("** In article {0}: ".format(articleIdx))
                    noErrorFound = False
                    pass
                output.append(u"line {0}, <{1}> is not unique".
                                  format(getLineNumber(rawSection.start("title"),
                                                       articleContent),
                                         titleLabel))
                pass
            pass

        # a sample transformation of sections
        # remove Nquestion1, text1, guide1, etc
        col = 0
        PreRequiredSections = [ "source", "addressA", "addressB",
                                "editor", "time", "inputTime",
                                "questionNum", "memo",
                                "text0", "guide0"]
        questionNum = None
        for s in PreRequiredSections:
            if articleIdx == 1:
                # In output "text0" should be "text", etc
                newIdxName = s
                if s[-1] == "0":
                    newIdxName = s[:-1]
                sheet1.write(0, col, newIdxName, style0)
            # it's possible that s section is not included in input file
            # in this case, use " " to prevent text overlapping in xls
            content = " "
            if s in exsitingSectionDict:
                content = exsitingSectionDict[s]

            # single line maker can have only one line of content
            if s[-1] != "0":
                content = getFirstLineContent(s, content)

            if s == "questionNum" and s in exsitingSectionDict:
                if content.strip().isdigit():
                    questionNum = int(content.strip())
                else:
                    if noErrorFound:
                        output.append("** In article {0}: ".format(articleIdx))
                        noErrorFound = False
                        pass
                    output.append(u"line {0}, <{1}> {2} is not a digital number".
                          format(getLineNumber(re.search("<{0}>".format(s), articleContent).start(),
                                               articleContent),
                                 s, content))
                    questionNum = content

            sheet1.write(articleIdx, col, content, style0)
            col += 1
            # At the end, remove used items
            # it's possible that some fields are not filled
            if s in exsitingSectionDict:
                del exsitingSectionDict[s]

        # expanding Nquestion0, answerN, noteN
        # xrange.max will not be reached
        # N < max(range)
        for i in xrange(1,
            (isinstance(questionNum, int) and [questionNum+1] or [10000])[0]):
            questionTitle = "{0}question0".format(i)
            questionTitleEnd = "{0}question1".format(i)
            preAnswer = "{0}preAnswer".format(i)
            answerTitle = "{0}answer".format(i)
            noteTitle = "{0}note".format(i)

            # If any field exists, the whole block exists
            if questionTitle in exsitingSectionDict or \
                preAnswer in exsitingSectionDict or \
                answerTitle in exsitingSectionDict or \
                noteTitle in exsitingSectionDict:

                errList = []
                try:
                    # prevent overwriting exception
                    sheet1.write(0, col, questionTitle[:-1], style0)
                    sheet1.write(0, col+1, preAnswer, style0)
                    sheet1.write(0, col+2, answerTitle, style0)
                    sheet1.write(0, col+3, noteTitle, style0)
                except:
                    pass

                # for some sections only the first line is needed
                f1 = lambda s, content: (s[-1] != "0" and s[-4:] != "note") \
                    and getFirstLineContent(s, content) \
                    or content
                # define a wraper, display the content
                # otherwise " " to avoid text overlap
                f = lambda key, collection: key in collection \
                                and f1(key, collection[key]) or " "
                # skip leading N. by programming (skip leading \S%d.\S
                rawQuestionContent = f(questionTitle, exsitingSectionDict)
                realQuestionContentStartMatch = re.search("^\s*\d+\.\s*", rawQuestionContent)
                realQuestion = rawQuestionContent

                if realQuestionContentStartMatch:
                    realQuestion = rawQuestionContent[realQuestionContentStartMatch.end():]

                sheet1.write(articleIdx, col, realQuestion, style0)
                sheet1.write(articleIdx, col+1, f(preAnswer, exsitingSectionDict), style0)
                sheet1.write(articleIdx, col+2, f(answerTitle, exsitingSectionDict), style0)
                sheet1.write(articleIdx, col+3, f(noteTitle, exsitingSectionDict), style0)

                col += 4

                try:
                    del exsitingSectionDict[questionTitle]
                    del exsitingSectionDict[questionTitleEnd]
                except:
                    errList.append(questionTitle)
                    pass
                try:
                    del exsitingSectionDict[preAnswer]
                except:
                    errList.append(preAnswer)
                    pass
                try:
                    del exsitingSectionDict[answerTitle]
                except:
                    errList.append(answerTitle)
                    pass
                try:
                    del exsitingSectionDict[noteTitle]
                except:
                    errList.append(noteTitle)
                    pass

                for err in errList:
                    if noErrorFound:
                        output.append("** In article {0}: ".format(articleIdx))
                        noErrorFound = False
                        pass
                    output.append(u"<{0}> field is mandatory but missed.".
                                      format(err))
            else:
                if i-1 != questionNum:
                    if noErrorFound:
                        output.append("** In article {0}: ".format(articleIdx))
                        noErrorFound = False
                        pass
                    output.append(u"{0} questions have been processed, which mismatch with declaration questionNum = {1}.".
                                      format(i-1, questionNum))
                break

        # check if there is unknown marker
        # unknown bug: unknown marker duddud0 will not be reported..
        for key, value in exsitingSectionDict.items():
            if key == "text1" or key == "guide1":
                # marker ended with 1 is a terminator which has not been removed from dict
                continue
            if noErrorFound:
                output.append("** In article {0}: ".format(articleIdx))
                noErrorFound = False
                pass
            output.append(u"line {0}, unknown marker <{1}> found".
                              format(getLineNumber(re.search("<{0}>".format(key), articleContent).start(),
                                                   articleContent),
                                     key))
            pass

    if isNoArticle:
        return (articleIdx, "No article found!")

    # filename[:-4] skip ending .txt
    # Chinese file name require unicode file name
    book.save(u".\output\{0}.xls".format(filename[:-4]))

    if output == []:
        return (articleIdx, True)
    else:
        return (articleIdx, "\n".join(output))
    pass


def processAFile(txt):
    def main():
        # ask for txt file location
        filepath = tkFileDialog.askopenfilename(initialdir = '.', filetypes=[("Text file","*.txt")])
        head, tail = os.path.split(filepath)
        filename = tail

        # decode to mbcs to display chinese
        # .decode("mbcs") is not needed with help of tkFileDialog.askopenfilename
        txt.insert(END, '=' * 5 + " " + filename + " " + '=' * 5 + '\n')
        txt.insert(END, "Start processing" + '\n')
        txt.yview(END)

        countArticles, output = processFile(filepath, filename)
        txt.insert(END, '{0} article(s) have been processed'.format(countArticles) + '\n')

        errorFileName = "./output/" + filename[:-4] + " - error.txt"
        logFileName = "./output/" + filename[:-4] + " - log.txt"

        if output is True:
            txt.insert(END, 'Successed' + '\n')
            if os.path.exists(errorFileName):
                # remove error file
                os.unlink(errorFileName)
            fobj = open(logFileName,  'w')
            fobj.write('{0} article(s) have been processed'.format(countArticles) + '\n')
            fobj.close()
        else:
            if os.path.exists(logFileName):
                # remove log file
                os.unlink(logFileName)
            txt.insert(END, 'Error:' + '\n')
            errorContent = "\t" + "\n\t".join(output.split("\n")) + '\n'
            txt.insert(END, errorContent)
            # create or rewrite error file
            fobj = open(errorFileName,  'w')
            fobj.write('{0} article(s) have been processed'.format(countArticles) + '\n')
            fobj.write(output)
            fobj.close()
        txt.yview(END)
        pass
    pass
    return main

if __name__ == "__main__":
    root = Tk() # create a root window

    root.geometry('500x200')
    scrollbar = Scrollbar(root)
    scrollbar.pack(side=RIGHT, fill=Y)

    label_line = Frame(root)
    label_line.pack(side=TOP, padx=1, pady=1)
    add_button = Button(label_line,
                        text='process a new file')
    add_button.pack()

    txt = Text(root,
               width=100,
               borderwidth=2,
               )
    txt.pack(side=LEFT, fill=BOTH)
    scrollbar.config(command=txt.yview)
    txt['yscrollcommand'] = scrollbar.set
    add_button['command'] = processAFile(txt)

    root.mainloop() # create an event loop
	from distutils.core import setup
	import py2exe

	#This script is based on py2exe
	setup(windows=["translation.py"])
	import re
	import xlrd
	import codecs
	import xlwt
	from Tkinter import *
	import tkFileDialog
	import os

	def getLineNumber(pos, content):
	lineNumber = content.count("\n", 0, pos+1)+1
	return lineNumber
	pass

	def valide(content):

	sugar = lambda x: "<" + x + ">"
	# list of error messages
	output = []
	# check of mandatory fields
	# according to requirements, all fields are mandatory
	mandatoryFields = ["source", "addressA", "addressB", "editor", "time",
	"questionNum", "memo", "inputTime",
	"text0", "guide0"]
	for field in mandatoryFields:
	if "<" + field + ">" not in content:
	output.append("{0} field is mandatory but missed.".format(sugar(field)))
	# check of pair match
	pairs = [["text0", "text1"], ["guide0", "guide1"]]
	# add pairs
	# TODO: bug: detection of pair0removed pair1
	for i in xrange(1, 10000):
	questionTitle = "{0}question0".format(i)
	if sugar(questionTitle) in content:
	pairs.append(["{0}question0".format(i), "{0}question1".format(i)])
	pass
	else:
	break
	# if pair0 exists, while pair1 not -> problem
	for pair in pairs:
	posPair0 = content.find(sugar(pair[0]), 0)
	if posPair0 > 0:
	posPair1 = content.find(sugar(pair[1]), posPair0+1) # prevent overlapping
	if posPair1 == -1:
	lineNumber = getLineNumber(posPair0, content)
	output.append("line {0}, operator {1} is not closed."
	.format(lineNumber, sugar(pair[0])))
	pass
	if output != []:
	return "\n".join(output)
	pass

	def processFile(filepath, filename):
	book = xlwt.Workbook()
	sheet1 = book.add_sheet('Sheet 1')

	# font
	font0 = xlwt.Font()
	font0.name = 'Times New Roman'
	style0 = xlwt.XFStyle()
	style0.font = font0

	"""
	test Failed
	Simplified A office can not be launched...
	# convert word file to plain text file
	word = client.Dispatch('Word.Application')
	doc = word.Documents.Open(filename)
	doc.SaveAs(filename + '.txt', 7)
	doc.Close()
	"""

	# Python need to know the encoding to read files
	# mbcs corresponds to ANSI in windows
	# utf-8 is UTF-8 in windows
	f = codecs.open(filepath, "r", "mbcs")
	content = f.read()

	# re.S dot match all
	# re.VERBOSE comments on regular expression
	articles = re.finditer(r"""
	\#< # starter
	(.*?) # all content, non-greedy
	\#> # terminator
	""", content, re.S\|re.VERBOSE)

	output = []

	# valide article sections
	pairs = ["#<", "#>"]
	lastPos = 0
	while True:
	pos0 = content.find(pairs[0], lastPos)
	if pos0 == -1:
	# there is no more left op
	pos1 = content.find(pairs[1], lastPos)
	if pos1 != -1:
	# still right op => left op is missing
	lineNumber = getLineNumber(pos1, content)
	return (0, "line {0}, #< is missing.".format(lineNumber))
	else:
	# no more right op => end of handle
	break
	break
	else:
	# still left op
	pos1 = content.find(pairs[1], lastPos + len(pairs[0]))
	if pos1 != -1:
	# still right op
	# make sure there is no right op before left op
	if pos1 > pos0:
	# make a pair
	lastPos = pos1 + len(pairs[1])
	else:
	lineNumber = getLineNumber(pos1, content)
	return (0, "line {0}, #< is missing.".format(lineNumber))
	else:
	# no more right op => right op is missing
	lineNumber = getLineNumber(pos0, content)
	return (0, "line {0}, #< is not closed.".format(lineNumber))
	pass

	# article information starts from line 1
	articleIdx = 0
	isNoArticle = True
	for article in articles:
	articleIdx = articleIdx + 1
	isNoArticle = False
	# terrible!
	global noErrorFound
	noErrorFound = True
	articleContent = article.group(1)
	# for each article


	# define wrapers
	# TODO: noErrorFound why not discloure
	def getFirstLineContent(s, content):
	global noErrorFound
	if "\n" in content:
	pos = re.search(r"\n", content).start()
	contentOfFirstLine = content[:pos]
	contentRightStrip = content.rstrip()
	if contentRightStrip != contentOfFirstLine.rstrip() or contentOfFirstLine.strip() == "":
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	pass
	if contentRightStrip != contentOfFirstLine.rstrip():
	output.append(u"line {0}, only the first line of text in <{1}> has been captured".
	format(getLineNumber(re.search("<{0}>".format(s), articleContent).start(),
	articleContent),
	s))
	# content.strip() == "" has generate its error message before
	if content.strip() != "" and \
	contentOfFirstLine.strip() == "":
	output.append(u"line {0}, <{1}> is empty".
	format(getLineNumber(re.search("<{0}>".format(s), articleContent).start(),
	articleContent),
	s))
	pass
	content = content[:pos]
	return content
	pass

	validation = valide(articleContent)
	if validation is not None:
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	output.append("\n".join(validation.strip().split("\n")))

	rawSections = re.finditer(r"""
	<(?P<title>
	[\ \t]\w[\ \t]* # match title
	)>
	(?P<content>
	.*? # match content, non-greedy
	)
	(?= # possitive lookahead (not counted for next iter
	(?P<end>
	<[\ \t]\w[\ \t]*>\| # next title
	# \#>\| # or article teminitor (removed, cause it's not in the section)
	$ # or end of section
	)
	)
	""", articleContent, re.S\|re.VERBOSE)

	exsitingSectionDict = dict()

	for rawSection in rawSections:
	# check for nested marker
	titleLabel = rawSection.group("title")
	# labels larger than 10 will be discarded
	if len(titleLabel) > 100 :
	continue
	if titleLabel != titleLabel.strip():
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	pass
	output.append(u"line {0}, <{1}> is not valid, do you mean <{2}>?".
	format(getLineNumber(rawSection.start("title"),
	articleContent),
	titleLabel, titleLabel.strip()))
	# no valid section, skip following processing
	continue
	endLabel = rawSection.group("end")[1:-1]
	if titleLabel[-1:] == "0" and titleLabel[:-1] != endLabel[:-1]:
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	pass
	output.append(u"line {0} - {1}, unexpected nested labels (plz check <{2}> and <{3}>)".
	format(getLineNumber(rawSection.start("title"),
	articleContent),
	getLineNumber(rawSection.start("end"),
	articleContent),
	titleLabel,
	endLabel))
	pass

	# avoid empty section
	if titleLabel[-1:] != "1" and \
	rawSection.group("content").strip() == "":
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	pass
	output.append(u"line {0}, <{1}> is empty".
	format(getLineNumber(rawSection.start("title"),
	articleContent),
	titleLabel))
	pass

	# avoid duplicated marker
	if titleLabel not in exsitingSectionDict:
	# normal
	exsitingSectionDict[titleLabel] = rawSection.group("content")

	else:
	# duplicate
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	pass
	output.append(u"line {0}, <{1}> is not unique".
	format(getLineNumber(rawSection.start("title"),
	articleContent),
	titleLabel))
	pass
	pass

	# a sample transformation of sections
	# remove Nquestion1, text1, guide1, etc
	col = 0
	PreRequiredSections = [ "source", "addressA", "addressB",
	"editor", "time", "inputTime",
	"questionNum", "memo",
	"text0", "guide0"]
	questionNum = None
	for s in PreRequiredSections:
	if articleIdx == 1:
	# In output "text0" should be "text", etc
	newIdxName = s
	if s[-1] == "0":
	newIdxName = s[:-1]
	sheet1.write(0, col, newIdxName, style0)
	# it's possible that s section is not included in input file
	# in this case, use " " to prevent text overlapping in xls
	content = " "
	if s in exsitingSectionDict:
	content = exsitingSectionDict[s]

	# single line maker can have only one line of content
	if s[-1] != "0":
	content = getFirstLineContent(s, content)

	if s == "questionNum" and s in exsitingSectionDict:
	if content.strip().isdigit():
	questionNum = int(content.strip())
	else:
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	pass
	output.append(u"line {0}, <{1}> {2} is not a digital number".
	format(getLineNumber(re.search("<{0}>".format(s), articleContent).start(),
	articleContent),
	s, content))
	questionNum = content

	sheet1.write(articleIdx, col, content, style0)
	col += 1
	# At the end, remove used items
	# it's possible that some fields are not filled
	if s in exsitingSectionDict:
	del exsitingSectionDict[s]

	# expanding Nquestion0, answerN, noteN
	# xrange.max will not be reached
	# N < max(range)
	for i in xrange(1,
	(isinstance(questionNum, int) and [questionNum+1] or [10000])[0]):
	questionTitle = "{0}question0".format(i)
	questionTitleEnd = "{0}question1".format(i)
	preAnswer = "{0}preAnswer".format(i)
	answerTitle = "{0}answer".format(i)
	noteTitle = "{0}note".format(i)

	# If any field exists, the whole block exists
	if questionTitle in exsitingSectionDict or \
	preAnswer in exsitingSectionDict or \
	answerTitle in exsitingSectionDict or \
	noteTitle in exsitingSectionDict:

	errList = []
	try:
	# prevent overwriting exception
	sheet1.write(0, col, questionTitle[:-1], style0)
	sheet1.write(0, col+1, preAnswer, style0)
	sheet1.write(0, col+2, answerTitle, style0)
	sheet1.write(0, col+3, noteTitle, style0)
	except:
	pass

	# for some sections only the first line is needed
	f1 = lambda s, content: (s[-1] != "0" and s[-4:] != "note") \
	and getFirstLineContent(s, content) \
	or content
	# define a wraper, display the content
	# otherwise " " to avoid text overlap
	f = lambda key, collection: key in collection \
	and f1(key, collection[key]) or " "
	# skip leading N. by programming (skip leading \S%d.\S
	rawQuestionContent = f(questionTitle, exsitingSectionDict)
	realQuestionContentStartMatch = re.search("^\s\d+\.\s", rawQuestionContent)
	realQuestion = rawQuestionContent

	if realQuestionContentStartMatch:
	realQuestion = rawQuestionContent[realQuestionContentStartMatch.end():]

	sheet1.write(articleIdx, col, realQuestion, style0)
	sheet1.write(articleIdx, col+1, f(preAnswer, exsitingSectionDict), style0)
	sheet1.write(articleIdx, col+2, f(answerTitle, exsitingSectionDict), style0)
	sheet1.write(articleIdx, col+3, f(noteTitle, exsitingSectionDict), style0)

	col += 4

	try:
	del exsitingSectionDict[questionTitle]
	del exsitingSectionDict[questionTitleEnd]
	except:
	errList.append(questionTitle)
	pass
	try:
	del exsitingSectionDict[preAnswer]
	except:
	errList.append(preAnswer)
	pass
	try:
	del exsitingSectionDict[answerTitle]
	except:
	errList.append(answerTitle)
	pass
	try:
	del exsitingSectionDict[noteTitle]
	except:
	errList.append(noteTitle)
	pass

	for err in errList:
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	pass
	output.append(u"<{0}> field is mandatory but missed.".
	format(err))
	else:
	if i-1 != questionNum:
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	pass
	output.append(u"{0} questions have been processed, which mismatch with declaration questionNum = {1}.".
	format(i-1, questionNum))
	break

	# check if there is unknown marker
	# unknown bug: unknown marker duddud0 will not be reported..
	for key, value in exsitingSectionDict.items():
	if key == "text1" or key == "guide1":
	# marker ended with 1 is a terminator which has not been removed from dict
	continue
	if noErrorFound:
	output.append("** In article {0}: ".format(articleIdx))
	noErrorFound = False
	pass
	output.append(u"line {0}, unknown marker <{1}> found".
	format(getLineNumber(re.search("<{0}>".format(key), articleContent).start(),
	articleContent),
	key))
	pass

	if isNoArticle:
	return (articleIdx, "No article found!")

	# filename[:-4] skip ending .txt
	# Chinese file name require unicode file name
	book.save(u".\output\{0}.xls".format(filename[:-4]))

	if output == []:
	return (articleIdx, True)
	else:
	return (articleIdx, "\n".join(output))
	pass


	def processAFile(txt):
	def main():
	# ask for txt file location
	filepath = tkFileDialog.askopenfilename(initialdir = '.', filetypes=[("Text file","*.txt")])
	head, tail = os.path.split(filepath)
	filename = tail

	# decode to mbcs to display chinese
	# .decode("mbcs") is not needed with help of tkFileDialog.askopenfilename
	txt.insert(END, '=' * 5 + " " + filename + " " + '=' * 5 + '\n')
	txt.insert(END, "Start processing" + '\n')
	txt.yview(END)

	countArticles, output = processFile(filepath, filename)
	txt.insert(END, '{0} article(s) have been processed'.format(countArticles) + '\n')

	errorFileName = "./output/" + filename[:-4] + " - error.txt"
	logFileName = "./output/" + filename[:-4] + " - log.txt"

	if output is True:
	txt.insert(END, 'Successed' + '\n')
	if os.path.exists(errorFileName):
	# remove error file
	os.unlink(errorFileName)
	fobj = open(logFileName, 'w')
	fobj.write('{0} article(s) have been processed'.format(countArticles) + '\n')
	fobj.close()
	else:
	if os.path.exists(logFileName):
	# remove log file
	os.unlink(logFileName)
	txt.insert(END, 'Error:' + '\n')
	errorContent = "\t" + "\n\t".join(output.split("\n")) + '\n'
	txt.insert(END, errorContent)
	# create or rewrite error file
	fobj = open(errorFileName, 'w')
	fobj.write('{0} article(s) have been processed'.format(countArticles) + '\n')
	fobj.write(output)
	fobj.close()
	txt.yview(END)
	pass
	pass
	return main

	if __name__ == "__main__":
	root = Tk() # create a root window

	root.geometry('500x200')
	scrollbar = Scrollbar(root)
	scrollbar.pack(side=RIGHT, fill=Y)

	label_line = Frame(root)
	label_line.pack(side=TOP, padx=1, pady=1)
	add_button = Button(label_line,
	text='process a new file')
	add_button.pack()

	txt = Text(root,
	width=100,
	borderwidth=2,
	)
	txt.pack(side=LEFT, fill=BOTH)
	scrollbar.config(command=txt.yview)
	txt['yscrollcommand'] = scrollbar.set
	add_button['command'] = processAFile(txt)

	root.mainloop() # create an event loop