idlecool/s3ocr.py

## s3ocr.py
# -*- coding: utf-8 -*-

""" Sahana Optical Character Recognision Utility (s3ocr)

    @author: Suryajith Chillara <suryajith1987[at]gmail.com>
    @author: Shiv Deepak <idlecool[at]gmail.com>

    @copyright: 2009-2011 (c) Sahana Software Foundation
    @license: MIT

    Permission is hereby granted, free of charge, to any person
    obtaining a copy of this software and associated documentation
    files (the "Software"), to deal in the Software without
    restriction, including without limitation the rights to use,
    copy, modify, merge, publish, distribute, sublicense, and/or sell
    copies of the Software, and to permit persons to whom the
    Software is furnished to do so, subject to the following
    conditions:

    The above copyright notice and this permission notice shall be
    included in all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    OTHER DEALINGS IN THE SOFTWARE.

"""

__all__ = ["S3OCR"]

#========================== import section ====================================

# Generic stuff
import os
import sys
import re
import uuid
import Image
import ImageOps
import ImageStat
import math
from StringIO import StringIO
from htmlentitydefs import name2codepoint

from lxml import etree

# Importing reportlab stuff
try:
    from reportlab.pdfgen.canvas import Canvas
    from reportlab.lib.pagesizes import A4
    from reportlab.graphics.barcode import code128
    # for adding more fonts
    from reportlab.pdfbase import pdfmetrics
    from reportlab.pdfbase.ttfonts import TTFont
    import reportlab
    reportlab.rl_config.warnOnMissingFontGlyphs = 0
except(ImportError):
    print >> sys.stderr, "S3 Debug: WARNING: S3OCR: reportlab has not been installed."

from gluon.storage import Storage

from s3rest import S3Method
from s3cfg import S3Config

#==========================================================================
#================================= OCR API ================================
#==========================================================================

class S3OCR(S3Method):
    """
    Generate XForms and PDFs the s3 way
    """

    def apply_method(self,
                     r,
                     **attr):
        """
        S3Method's abstract method
        """

        xml = self.manager.xml
        self.r = r

        # s3ocr_config - dict which stores ocr configuration
        #                            settings for a resource)
        s3ocr_config = attr.get("s3ocr_config", {})

        # storing localised names of components
        self.rheader_tabs = s3ocr_config.get("tabs", [])

        # store custom pdf title (if any)
        self.pdftitle = s3ocr_config.get("pdftitle", None)

        # store components which have to be excluded
        self.exclude_component_list = s3ocr_config.get("exclude_components", [])

        # store individual field specific properties
        self.custom_field_properties = s3ocr_config.get("field_properties", {})

        # example field_properties
        # field_properties = {
        #     "%s_%s__%s" % (prefix, resourcename, fieldname): { fieldtype="",
        #                                                        .
        #                                                        .
        #                                                        }
        #     }

        # store individual fieldtype specific properties
        s3config = S3Config(globals(), self.T)
        self.custom_fieldtype_properties = \
            s3config.get_s3ocr_fieldtype_properties()

        # example field_properties
        # field_properties = {
        #     fieldtype : { fieldtype="",
        #                   .
        #                   .
        #                   }
        #     }

        # field type convention mapping from resource to pdf forms
        self.generic_ocr_field_type = {
            "string": "string",
            "text": "textbox",
            "boolean" : "boolean",
            "double": "double",
            "date": "date",
            "datetime": "datetime",
            "integer": "integer",
            "list:integer": "multiselect",
            "list:string": "multiselect",
            "list:double": "multiselect",
            "list:text": "multiselect",
            }

        # text for localisation
        self.l10n = {
            "datetime_hint": {
                "date": self.T("fill in order: day(2) month(2) year(4)"),
                "datetime": self.T("fill in order: hour(2) min(2) day(2) month(2) year(4)"),
                },
            "ocr_inst": {
                "inst1": self.T("1. Fill the necessary fields in BLOCK CAPITAL letters."),
                "inst2": self.T("2. Always use one box per letter and leave one box space to separate words."),
                "inst3": self.T("3. Fill in the circles completely."),
                },
            "boolean": {
                "yes": self.T("Yes"),
                "no": self.T("No"),
                },
            "select": {
                "multiselect": self.T("Select one or more option(s) that apply"),
                "singleselect": self.T("Select any one option that apply"),
                },
            }

        # check if debug mode is enabled
        if r.vars.get("_debug", False) == "1":
            self.debug = True
        else:
            self.debug = False

        if self.debug:
            content_disposition = "inline"
        else:
            content_disposition = "attachment"

        # serve the request
        format = r.representation
        if r.http == "GET":
            if format == "xml":
                output = self.s3ocr_etree()
                self.response.view = "xml.html"
                self.response.headers["Content-Type"] = "application/xml"
                return xml.tostring(output, pretty_print=True)
            elif format == "pdf":
                if r.vars.get("_operation", False) == "putpdf":
                    output = self.s3ocr_parser()
                    self.response.view = None
                    self.response.headers["Content-Type"] = "text/plain"
                    #self.response.headers["Content-disposition"] = \
                    #    "%s; filename=\"%s.pdf\"" % (content_disposition,
                    #                                 self.tablename)
                    return output
                else:
                    form_uuid = uuid.uuid1()
                    form_revision = self.__book_revision(form_uuid)
                    output, layout_etree = self.pdf_manager(form_uuid, form_revision)
                    self.__update_dbmeta(layout_xml=etree.tostring(layout_etree),
                                         form_uuid=form_uuid,
                                         revision=form_revision)

                    self.response.view = None
                    self.response.headers["Content-Type"] = "application/pdf"
                    self.response.headers["Content-disposition"] = \
                        "%s; filename=\"%s.pdf\"" % (content_disposition,
                                                     self.tablename)
                    return output
            else:
                r.error(501, self.manager.ERROR.BAD_FORMAT)
        elif r.http in ("POST","PUT"):
            if format == "xml":
                r.error(501, self.manager.ERROR.NOT_IMPLEMENTED)
            elif format == "pdf":
                r.error(501, self.manager.ERROR.NOT_IMPLEMENTED)
            else:
                r.error(501, self.manager.ERROR.BAD_FORMAT)
        else:
            r.error(501, self.manager.ERROR.BAD_METHOD)

    def s3ocr_etree(self):
        """
        Optimise & Modifiy s3xml etree to and produce s3ocr etree
        """

        s3xml_etree = self.resource.struct(options=True,
                                   references=True,
                                   stylesheet=None,
                                   as_json=False,
                                   as_tree=True)
        # xml tags
        ITEXT = "label"
        HINT = "comment"
        TYPE = "type"
        HASOPTIONS = "has_options"
        LINES = "lines"
        BOXES = "boxes"

        # Components Localised Text added to the etree
        # Convering s3xml to s3ocr_xml (nicer to traverse)
        s3xml_root = s3xml_etree.getroot()
        resource_element = s3xml_root.getchildren()[0]
        s3ocr_root = etree.Element("s3ocr")

        if self.r.component:     # if it is a component
            component_sequence, components_l10n_dict = \
                self.__rheader_tabs_sequence(self.r.tablename)
            resource_element.set(ITEXT,
                                 components_l10n_dict.get(None,
                                                          self.resource.tablename))
            s3ocr_root.append(resource_element)

        else:                    # if it is main resource
            componentetrees = []
            # mres is main resource etree
            mres = etree.Element("resource")
            for attr in resource_element.attrib.keys():
                mres.set(attr, resource_element.attrib.get(attr))
            for field_element in resource_element:
                if field_element.tag == "field":       # main resource fields
                    mres.append(field_element)
                elif field_element.tag == "resource":  # component resource
                    componentetrees.append(field_element)

            # Serialisation of Component List and l10n
            component_sequence, components_l10n_dict = \
                self.__rheader_tabs_sequence(self.r.tablename)

            mres.set(ITEXT, components_l10n_dict.get(None,
                                                     self.resource.tablename))

            if component_sequence:
                serialised_component_etrees = []
                for eachcomponent in component_sequence:
                    component_table = "%s_%s" % (self.prefix, eachcomponent)

                    for eachtree in componentetrees:
                        if eachtree.attrib.get("name", None) == component_table:
                            # l10n strings are added and sequencing is done here
                            eachtree.set(ITEXT,
                                         components_l10n_dict.get(eachcomponent,
                                                                  component_table))
                            serialised_component_etrees.append(eachtree)
            else:
                serialised_component_etrees = componentetrees

            # create s3ocr tree
            s3ocr_root.append(mres)
            for res in serialised_component_etrees:
                s3ocr_root.append(res)

        # remove fields which are not required
        # loading user defined configuartions
        FIELD_TYPE_LINES = { # mapping types with number of lines
            "string": 2,
            "textbox": 4,
            "integer": 1,
            "double": 1,
            "date": 1,
            "datetime": 1,
            }
        FIELD_TYPE_BOXES = { # mapping type with numboxes
            "integer": 9,
            "double": 16,
            }
        for eachresource in s3ocr_root.iterchildren():
            resourcetablename = eachresource.attrib.get("name")

            if eachresource.attrib.get("name") in self.exclude_component_list:
                # excluded components are removed
                s3ocr_root.remove(eachresource)
                continue
            for eachfield in eachresource.iterchildren():
                fieldname = eachfield.attrib.get("name")
                # fields which have to be displayed
                fieldtype = eachfield.attrib.get(TYPE)

                # loading ocr specific fieldtypes
                ocrfieldtype = self.generic_ocr_field_type.get(fieldtype,
                                                               None)
                if ocrfieldtype != None:
                    eachfield.set(TYPE, ocrfieldtype)
                    # refresh fieldtypes after update
                    fieldtype = eachfield.attrib.get(TYPE)

                # set num boxes and lines
                fieldhasoptions = eachfield.attrib.get(HASOPTIONS)
                if fieldhasoptions == "False":
                    eachfield.set(LINES,
                                  str(FIELD_TYPE_LINES.get(fieldtype,
                                                           1)))
                    if fieldtype in FIELD_TYPE_BOXES.keys():
                        eachfield.set(BOXES,
                                      str(FIELD_TYPE_BOXES.get(fieldtype)))

                # if field is readable but not writable set default value
                if eachfield.attrib.get("readable", "False") == "True" and \
                        eachfield.attrib.get("writable", "False") == "False":
                    try:
                        fieldresourcename = \
                            eachresource.attrib.get("name").split("%s_" %\
                                                                      self.prefix)[1]
                    except:
                        fieldresourcename = \
                            eachresource.attrib.get("name").split("_")[1]

                    fieldresource = \
                        self.resource.components.get(fieldresourcename, None)
                    if not fieldresource:
                        fieldresource = self.resource
                    fieldname = eachfield.attrib.get("name")
                    try:
                        fielddefault = self.r.resource.table[fieldname].default
                    except(KeyError):
                        fielddefault = "None"
                    eachfield.set("default",
                                  str(fielddefault))

                # load custom fieldtype specific settings
                if fieldtype not in self.generic_ocr_field_type.values() \
                        and fieldtype in self.custom_fieldtype_properties.keys():
                    self.__update_custom_fieldtype_settings(eachfield)
                    # refresh fieldtypes after update
                    fieldtype = eachfield.attrib.get(TYPE)

                # for unknown field types
                if fieldtype not in self.generic_ocr_field_type.values():
                    eachfield.set(TYPE, "string")
                    eachfield.set(HASOPTIONS, "False")
                    eachfield.set(LINES, "2")
                    # refresh fieldtypes after update
                    fieldtype = eachfield.attrib.get(TYPE)

                # loading custom field specific settings
                self.__update_custom_field_settings(eachfield,
                                                    resourcetablename,
                                                    fieldname)

                # in ocr boolean fields should be shown as options
                if fieldtype == "boolean":
                    eachfield.set(HASOPTIONS, "True")

                # fields removed which need not be displayed
                if eachfield.attrib.get("readable", "False") == "False" and \
                        eachfield.attrib.get("writable", "False") == "False":
                    eachresource.remove(eachfield)
                    continue

                if eachfield.attrib.get(HASOPTIONS, "False") == "True" and \
                        eachfield.attrib.get(TYPE) != "boolean":
                    s3ocrselect = eachfield.getchildren()[0]
                    for eachoption in s3ocrselect.iterchildren():
                        if eachoption.text == "" or eachoption.text == None:
                            s3ocrselect.remove(eachoption)
                            continue
        return s3ocr_root

    def pdf_manager(self, form_uuid, form_revision):
        """
        Produces OCR Compatible PDF forms
        """

        s3ocr_root = self.s3ocr_etree() # get element s3xml

        s3ocr_layout_etree = etree.Element("s3ocrlayout")
        # define font size
        titlefontsize = 18
        sectionfontsize = 15
        regularfontsize = 13
        hintfontsize = 10

        # etree labels
        ITEXT = "label"
        HINT = "comment"
        TYPE = "type"
        HASOPTIONS = "has_options"
        LINES = "lines"
        BOXES = "boxes"

        #l10n
        l10n = self.l10n

        # get pdf title
        if self.pdftitle == None or self.pdftitle == "":
            try:
                pdftitle = self.manager.s3.crud_strings[\
                    self.tablename].subtitle_list.decode("utf-8")
            except:
                pdftitle = self.resource.tablename
        else:
            pdftitle = self.pdftitle

        # prepare pdf
        form = Form(form_uuid=form_uuid,
                    form_revision=form_revision,
                    form_resourcename="%s_%s" % (self.prefix,
                                                 self.resource.name))
        form.decorate()

        # set header
        form.canvas.setTitle(pdftitle) # set pdf meta title
        form.print_text([pdftitle,],
                        fontsize=titlefontsize,
                        style="center") # set pdf header title

        form.print_text(
            [
                unicode(l10n.get("ocr_inst").get("inst1").decode("utf-8")),
                unicode(l10n.get("ocr_inst").get("inst2").decode("utf-8")),
                unicode(l10n.get("ocr_inst").get("inst3").decode("utf-8"))
                ],
            fontsize=regularfontsize,
            gray=0)
        form.linespace(3)
        # printing the etree
        for eachresource in s3ocr_root:
            # create resource element of ocr layout xml
            s3ocr_layout_resource_etree =\
                etree.SubElement(s3ocr_layout_etree,
                                 "resource", name=eachresource.attrib.get("name"))

            form.draw_line()
            form.print_text([
                    eachresource.attrib.get(ITEXT,
                                            eachresource.attrib.get("name"))
                    ],
                            fontsize=sectionfontsize)
            form.draw_line(nextline=1)
            form.linespace(12) # line spacing between each field
            for eachfield in eachresource.iterchildren():
                # create field element of ocr layout xml
                s3ocr_layout_field_etree =\
                    etree.SubElement(s3ocr_layout_resource_etree,
                                     "field",
                                     name=eachfield.attrib.get("name"),
                                     type=eachfield.attrib.get("type"))

                fieldlabel = eachfield.attrib.get(ITEXT)
                spacing = " " * 5
                fieldhint = self.__trim(eachfield.attrib.get(HINT))
                if fieldhint != "" and fieldhint != None:
                    form.print_text(["%s%s( %s )" % \
                                         (fieldlabel,
                                          spacing,
                                          fieldhint)],
                                     fontsize=regularfontsize)
                else:
                    form.print_text([fieldlabel],
                                     fontsize=regularfontsize)

                if eachfield.attrib.get("readable", "False") == "True" and \
                        eachfield.attrib.get("writable", "False") == "False":
                    # if it is a readonly field
                    form.print_text(
                        [eachfield.attrib.get("default","No default Value")],
                        seek=10,
                        )
                elif eachfield.attrib.get(HASOPTIONS) == "True":
                    fieldtype = eachfield.attrib.get(TYPE)
                    # if the field has to be shown with options
                    if fieldtype == "boolean":
                        form.nextline()
                        form.resetx()
                        bool_text = l10n.get("boolean")
                        form.print_text(
                            [bool_text.get("yes").decode("utf-8")],
                            continuetext=1,
                            seek=3,
                            )
                        loc_info = form.draw_circle(
                            boxes=1,
                            continuetext=1,
                            gray=0.9,
                            seek=10,
                            fontsize=12,
                            )
                        # create checkbox element of ocr layout xml
                        s3ocr_layout_optionbox_etree =\
                            etree.SubElement(s3ocr_layout_field_etree,
                                             "optionbox",
                                             x=str(loc_info["x"]),
                                             y=str(loc_info["y"]),
                                             radius=str(loc_info["radius"]),
                                             boxes=str(loc_info["boxes"]),
                                             page=str(loc_info["page"]))
                    	s3ocr_layout_optionbox_etree.text = "yes"

                        form.print_text(
                            [bool_text.get("no").decode("utf-8")],
                            continuetext=1,
                            seek=10,
                            )
                        loc_info = form.draw_circle(
                            boxes=1,
                            continuetext=1,
                            gray=0.9,
                            seek=10,
                            fontsize=12,
                            )
                        # create checkbox element of ocr layout xml
                        s3ocr_layout_optionbox_etree =\
                            etree.SubElement(s3ocr_layout_field_etree,
                                             "optionbox",
                                             x=str(loc_info["x"]),
                                             y=str(loc_info["y"]),
                                             radius=str(loc_info["radius"]),
                                             boxes=str(loc_info["boxes"]),
                                             page=str(loc_info["page"]))
                    	s3ocr_layout_optionbox_etree.text = "no"

                    else:
                        if fieldtype == "multiselect":
                            option_hint = l10n.get("select").get("multiselect")
                        else:
                            option_hint = l10n.get("select").get("singleselect")
                        form.print_text(
                            [option_hint.decode("utf-8")],
                            fontsize=hintfontsize,
                            gray=0.4,
                            seek=3,
                            )
                        s3ocrselect = eachfield.getchildren()[0]
                        form.nextline(regularfontsize)
                        form.resetx() # move cursor to the front
                        optionseek = 10
                        # resting margin for options
                        formmargin = form.marginsides
                        form.marginsides = optionseek + formmargin
                        for eachoption in s3ocrselect.iterchildren():
                            form.print_text(
                                [eachoption.text],
                                continuetext=1,
                                fontsize = regularfontsize,
                                seek = 10,
                                )
                            loc_info = form.draw_circle(
                                boxes=1,
                                continuetext=1,
                                gray=0.9,
                                seek=10,
                                fontsize=12,
                                )
                            # create checkbox element of ocr layout xml
                            s3ocr_layout_optionbox_etree =\
                                etree.SubElement(s3ocr_layout_field_etree,
                                                 "optionbox",
                                                 x=str(loc_info["x"]),
                                                 y=str(loc_info["y"]),
                                                 radius=str(loc_info["radius"]),
                                                 boxes=str(loc_info["boxes"]),
                                                 page=str(loc_info["page"]))
                            s3ocr_layout_optionbox_etree.text =\
                                eachoption.attrib.get("value")
                        # restoring orginal margin
                        form.marginsides = formmargin

                else:
                    # if it is a text field
                    fieldtype = eachfield.attrib.get(TYPE)
                    BOXES_TYPES = ["string", "textbox", "integer",
                                   "double", "date", "datetime",]
                    if fieldtype in BOXES_TYPES:
                        if fieldtype in ["string", "textbox"]:
                            form.linespace(3)
                            num_lines = int(eachfield.attrib.get("lines",
                                                                     1))
                            for eachline in xrange(num_lines):
                                loc_info = form.draw_check_boxes(
                                    completeline=1,
                                    gray=0.9,
                                    seek=3,
                                    )
                                # create checkbox element of ocr layout xml
                                s3ocr_layout_textbox_etree =\
                                    etree.SubElement(s3ocr_layout_field_etree,
                                                     "textbox",
                                                     x=str(loc_info["x"]),
                                                     y=str(loc_info["y"]),
                                                     side=str(loc_info["side"]),
                                                     boxes=str(loc_info["boxes"]),
                                                     page=str(loc_info["page"]))
                            	s3ocr_layout_textbox_etree.text = " "
                        elif fieldtype in ["integer", "double"]:
                            num_boxes = int(eachfield.attrib.get("boxes",
                                                                 9))
                            form.linespace(3)
                            loc_info = form.draw_check_boxes(
                                boxes = num_boxes,
                                gray=0.9,
                                seek=3,
                                )
                            # create checkbox element of ocr layout xml
                            s3ocr_layout_textbox_etree =\
                                etree.SubElement(s3ocr_layout_field_etree,
                                                 "textbox",
                                                 x=str(loc_info["x"]),
                                                 y=str(loc_info["y"]),
                                                 side=str(loc_info["side"]),
                                                 boxes=str(loc_info["boxes"]),
                                                 page=str(loc_info["page"]))
			    s3ocr_layout_textbox_etree.text = " "

                        elif fieldtype in ["date", "datetime"]:
                            # print hint
                            hinttext = \
                                l10n.get("datetime_hint").get(fieldtype).decode("utf-8")
                            form.print_text(
                                [hinttext],
                                fontsize=hintfontsize,
                                gray=0.4,
                                seek=3,
                                )
                            form.linespace(8)
                            datetime_continuetext = 0
                            datetime_seek = 3
                            if fieldtype == "datetime":
                                datetime_continuetext = 1
                                datetime_seek = 6
                                #HH
                                loc_info = form.draw_check_boxes(
                                    boxes = 2,
                                    gray=0.9,
                                    seek = 3,
                                    )
                                # create checkbox element of ocr layout xml
                                s3ocr_layout_textbox_etree =\
                                    etree.SubElement(s3ocr_layout_field_etree,
                                                     "textbox",
                                                     x=str(loc_info["x"]),
                                                     y=str(loc_info["y"]),
                                                     side=str(loc_info["side"]),
                                                     boxes=str(loc_info["boxes"]),
                                                     page=str(loc_info["page"]))
                            	s3ocr_layout_textbox_etree.text = "HH"

                                #MM
                                loc_info = form.draw_check_boxes(
                                    boxes = 2,
                                    gray=0.9,
                                    continuetext=1,
                                    seek = 4,
                                    )
                                # create checkbox element of ocr layout xml
                                s3ocr_layout_textbox_etree =\
                                    etree.SubElement(s3ocr_layout_field_etree,
                                                     "textbox",
                                                     x=str(loc_info["x"]),
                                                     y=str(loc_info["y"]),
                                                     side=str(loc_info["side"]),
                                                     boxes=str(loc_info["boxes"]),
                                                     page=str(loc_info["page"]))
                            	s3ocr_layout_textbox_etree.text = "MM"

                            # DD
                            loc_info = form.draw_check_boxes(
                                boxes = 2,
                                gray=0.9,
                                continuetext = datetime_continuetext,
                                seek = datetime_seek,
                                )
                            # create checkbox element of ocr layout xml
                            s3ocr_layout_textbox_etree =\
                                etree.SubElement(s3ocr_layout_field_etree,
                                                 "textbox",
                                                 x=str(loc_info["x"]),
                                                 y=str(loc_info["y"]),
                                                 side=str(loc_info["side"]),
                                                 boxes=str(loc_info["boxes"]),
                                                 page=str(loc_info["page"]))
                            s3ocr_layout_textbox_etree.text = "DD"

                            # MO
                            loc_info = form.draw_check_boxes(
                                boxes = 2,
                                gray=0.9,
                                continuetext=1,
                                seek = 4,
                                )
                            # create checkbox element of ocr layout xml
                            s3ocr_layout_textbox_etree =\
                                etree.SubElement(s3ocr_layout_field_etree,
                                                 "textbox",
                                                 x=str(loc_info["x"]),
                                                 y=str(loc_info["y"]),
                                                 side=str(loc_info["side"]),
                                                 boxes=str(loc_info["boxes"]),
                                                 page=str(loc_info["page"]))
                            s3ocr_layout_textbox_etree.text = "MO"

                            # YYYY
                            loc_info = form.draw_check_boxes(
                                boxes = 4,
                                gray=0.9,
                                continuetext=1,
                                seek = 4,
                                )
                            # create checkbox element of ocr layout xml
                            s3ocr_layout_textbox_etree =\
                                etree.SubElement(s3ocr_layout_field_etree,
                                                 "textbox",
                                                 x=str(loc_info["x"]),
                                                 y=str(loc_info["y"]),
                                                 side=str(loc_info["side"]),
                                                 boxes=str(loc_info["boxes"]),
                                                 page=str(loc_info["page"]))
                            s3ocr_layout_textbox_etree.text = "YYYY"

                    else:
                        self.r.error(501, self.manager.PARSE_ERROR)
                        print sys.stderr("%s :invalid field type: %s" %\
                                             (eachfield.attrib.get("name"),
                                              fieldtype))
        return form.save(), s3ocr_layout_etree

    def __update_custom_fieldtype_settings(self,
                                       eachfield, #field etree
                                       ):
        """
        Update custom fieldtype specific settings into the etree
        """

        # xml attributes
        TYPE = "type"
        READABLE = "readable"
        WRITABLE = "writable"
        LABEL = "label"
        HINT = "comment"
        DEFAULT = "default"
        LINES = "lines"
        BOXES = "boxes"
        HASOPTIONS = "has_options"

        fieldtype = eachfield.attrib.get(TYPE)
        field_property = self.custom_fieldtype_properties.get(fieldtype,  {})

        cust_fieldtype = fieldtype_property.get("fieldtype", None)
        cust_readable = fieldtype_property.get("readable", None)
        cust_writable = fieldtype_property.get("writable", None)
        cust_label = fieldtype_property.get("label", None)
        cust_hint = fieldtype_property.get("hint", None)
        cust_default = fieldtype_property.get("default", None)
        cust_lines = fieldtype_property.get("lines", None)
        cust_boxes = fieldtype_property.get("boxes", None)
        cust_has_options = fieldtype_property.get("has_options", None)
        cust_options = fieldtype_property.get("options", None)

        if cust_fieldtype:
            if cust_fieldtype != None:
                eachfield.set(TYPE, cust_fieldtype)
            if cust_readable != None:
                eachfield.set(READABLE, cust_readable)
            if cust_writable != None:
                eachfield.set(WRITABLE, cust_writable)
            if cust_label != None:
                eachfield.set(LABEL, cust_label)
            if cust_hint != None:
                eachfield.set(HINT, cust_hint)
            if cust_default != None:
                eachfield.set(DEFAULT, cust_default)
            if cust_lines != None:
                eachfield.set(LINES, cust_lines)
            if cust_boxes != None:
                eachfield.set(BOXES, cust_boxes)
            if cust_has_options != None:
                eachfield.set(HASOPTIONS, cust_has_options)
            if cust_options != None:
                opt_available = eachfield.getchildren()
                if len(opt_available) == 0:
                    eachfield.append(cust_options)
                elif len(opt_available) == 1:
                    eachfield.remove(opt_available[0])
                    eachfield.append(cust_options)

    def __update_custom_field_settings(self,
                                       eachfield, #field etree
                                       resourcetablename,
                                       fieldname
                                       ):
        """
        Update custom field specific settings into the etree
        """

        # xml attributes
        TYPE = "type"
        READABLE = "readable"
        WRITABLE = "writable"
        LABEL = "label"
        HINT = "comment"
        DEFAULT = "default"
        LINES = "lines"
        BOXES = "boxes"
        HASOPTIONS = "has_options"

        unikey = "%s__%s" % (resourcetablename, fieldname)
        field_property = self.custom_field_properties.get(unikey,  {})

        cust_fieldtype = field_property.get("fieldtype", None)
        cust_readable = field_property.get("readable", None)
        cust_writable = field_property.get("writable", None)
        cust_label = field_property.get("label", None)
        cust_hint = field_property.get("hint", None)
        cust_default = field_property.get("default", None)
        cust_lines = field_property.get("lines", None)
        cust_boxes = field_property.get("boxes", None)
        cust_has_options = field_property.get("has_options", None)
        cust_options = field_property.get("options", None)

        if cust_fieldtype:
            if cust_fieldtype != None:
                eachfield.set(TYPE, cust_fieldtype)
            if cust_readable != None:
                eachfield.set(READABLE, cust_readable)
            if cust_writable != None:
                eachfield.set(WRITABLE, cust_writable)
            if cust_label != None:
                eachfield.set(LABEL, cust_label)
            if cust_hint != None:
                eachfield.set(HINT, cust_hint)
            if cust_default != None:
                eachfield.set(DEFAULT, cust_default)
            if cust_lines != None:
                eachfield.set(LINES, cust_lines)
            if cust_boxes != None:
                eachfield.set(BOXES, cust_boxes)
            if cust_has_options != None:
                eachfield.set(HASOPTIONS, cust_has_options)
            if cust_options != None:
                opt_available = eachfield.getchildren()
                if len(opt_available) == 0:
                    eachfield.append(cust_options)
                elif len(opt_available) == 1:
                    eachfield.remove(opt_available[0])
                    eachfield.append(cust_options)

    def __rheader_tabs_sequence(self, resourcename):
        """
        Sequence of components is returned as a list
        """

        component_seq = []
        component_l10n_dict = {}
        rtabs = self.rheader_tabs
        for eachel in rtabs:
            if eachel[1] != None:
                component_seq.append(eachel[1])
            component_l10n_dict[eachel[1]] = eachel[0].decode("utf-8")
        return component_seq, component_l10n_dict

    def __trim(self, text):
        """
        Helper to trim off any enclosing paranthesis
        """

        if isinstance(text, str) and \
                text[0] == "(" and \
                text[-1] == ")":
            text = text[1:-1]
        return text

    def __update_dbmeta(self, **kwargs):
        """
        Store the PDF layout information into the database/disk.
        """

        # basic fields
        form_uuid = kwargs.get("form_uuid", None)
        layout_xml = kwargs.get("layout_xml", None)
        revision = kwargs.get("revision", None)

        layout_file_stream = StringIO(layout_xml)
        layout_file_name = "%s_xml" % form_uuid

        db = self.db
        tablename = "ocr_meta"

        #is_component = False if (len(self.resource.components) == 0) else True
        resource_name = "%s_%s" % (self.prefix, self.resource.name)

        rows = db(db[tablename]["form_uuid"] == form_uuid).select()
        row = rows[0]
        row.update_record(layout_file=db[tablename]["layout_file"].store(\
                layout_file_stream,
                layout_file_name))

    def __book_revision(self, form_uuid):
        """
        Books a revision number for current operation in ocr_meta
        """

        db = self.db
        tablename = "ocr_meta"
        resource_name = "%s_%s" % (self.prefix, self.resource.name)

        #determiniing revision
        selector = db[tablename]["revision"].max()
        rows = db(db[tablename]["resource_name"]==resource_name).select(selector)
        row = rows.first()
        revision = 0 if (row[selector] == None) else (row[selector] + 1)
        db[tablename].insert(form_uuid=form_uuid,
                             resource_name=resource_name,
                             revision=revision)

        return revision

    def s3ocr_parser(self, **kwargs):
        """ performs OCR on a given set of pages """

        pages = kwargs.get("pages", None)
        raw_images = {}
        images = {}
        form_uuid = kwargs.get("form_uuid", None)
        revision = kwargs.get("revision", None)
        resourcename = kwargs.get("resourcename", None)

        # <debug only>
        for i in xrange(0, 11):
            print "page %s" % i
            raw_images[i+1] = Image.open(os.path.join("/home/idlecool/",
                                                "pr_person-%s.png" % i))
        pages = 1
        form_uuid = "4ab7c932-8fdb-11e0-bf41-533e0c24f0a"
        revision = 14
        resourcename = "pr_person"
        # </debug only>

        # transform image
        for each_img_index in raw_images.keys():
            print each_img_index
            images[each_img_index] = {}
            images[each_img_index]["image"] =\
                self.__convertImage2binary(raw_images[each_img_index])
            images[each_img_index]["markers"] =\
                self.__getMarkers(images[each_img_index]["image"])
            images[each_img_index]["orientation"] =\
                self.__getOrientation(images[each_img_index]["markers"])
            if images[each_img_index]["orientation"] != 0.0:
                images[each_img_index]["image"] =\
                    images[each_img_index]["image"].rotate(images[each_img_index]["orientation"])
                images[each_img_index]["markers"] =\
                    self.__getMarkers(images[each_img_index]["image"])
                images[each_img_index]["orientation"] =\
                    self.__getOrientation(images[each_img_index]["markers"])

            images[each_img_index]["scalefactor"] =\
                self.__scaleFactor(images[each_img_index]["markers"])

        # get layout file, convert it to etree
        db = self.db
        layout_row =\
            db(db["ocr_meta"]["form_uuid"]==form_uuid and\
                   db["ocr_meta"]["resource_name"]==resourcename and\
                   db["ocr_meta"]["revision"]==revision
               ).select(db["ocr_meta"]["layout_file"]).first()
        layout_file = open(os.path.join("./applications/",
                                       self.request.application,
                                       'uploads/ocr_meta/',
                                       layout_row["layout_file"]),
                          'rb')
        layout_xml = layout_file.read()
        layout_file.close()
        layout_etree = etree.fromstring(layout_xml)
        for eachresource in layout_etree:
            for eachfield in eachresource:
                field_type = eachfield.attrib.get("type")
                components = eachfield.getchildren()
                numcomponents = len(components)
                if numcomponents == 0:
                    continue
                else:
                    component_type = components[0].tag
                    if component_type in ("optionbox", "textbox"):
                        if component_type == "optionbox":
                            linenum = 0
                            for eachcomponent in components:
                                 comp_x = float(eachcomponent.attrib.get("x"))
                                 comp_y = float(eachcomponent.attrib.get("y"))
                                 comp_boxes = int(eachcomponent.attrib.get("boxes"))
                                 comp_radius = float(eachcomponent.attrib.get("radius"))
                                 comp_page = int(eachcomponent.attrib.get("page"))
                                 comp_value = str(eachcomponent.text)
                                 try:
                                    page_origin = images[comp_page]["markers"]
                                 except(KeyError):
                                     self.r.error(501,
                                                  self.T("insufficient number of pages provided"))
                                 print eachcomponent.tag
                                 crop_box = (
                                     int(page_origin[0][0]+\
                                             (comp_x*\
                                                  images[comp_page]["scalefactor"]["x"])-\
                                             comp_radius*images[comp_page]["scalefactor"]["x"]),
                                     int(page_origin[0][1]+\
                                             (comp_y*\
                                                  images[comp_page]["scalefactor"]["y"])-\
                                             comp_radius*images[comp_page]["scalefactor"]["y"]),
                                     int(page_origin[0][0]+\
                                             (comp_x*\
                                                  images[comp_page]["scalefactor"]["x"])+\
                                             comp_radius*images[comp_page]["scalefactor"]["x"]),
                                     int(page_origin[0][1]+\
                                             (comp_y*\
                                                  images[comp_page]["scalefactor"]["y"])+\
                                             comp_radius*images[comp_page]["scalefactor"]["y"]),
                                     )
                                 temp_image = images[comp_page]["image"].crop(crop_box)
                                 print eachcomponent.tag
                                 cropped_image = images[comp_page]["image"].crop(crop_box)
                                 result = self.__ocrIt(cropped_image,
                                                       form_uuid,
                                                       resourcename,
                                                       linenum,
                                                       content_type="optionbox")
                                 if result:
                                     print "TRUE: %s" % comp_value
                                 linenum+=1
                        elif component_type == "textbox":
                            linenum = 1
                            for eachcomponent in components:
                                comp_x = float(eachcomponent.attrib.get("x"))
                                comp_y = float(eachcomponent.attrib.get("y"))
                                comp_boxes = int(eachcomponent.attrib.get("boxes"))
                                comp_side = float(eachcomponent.attrib.get("side"))
                                comp_page = int(eachcomponent.attrib.get("page"))
                                comp_meta = str(eachcomponent.text)
                                try:
                                    page_origin = images[comp_page]["markers"]
                                except(KeyError):
                                    self.r.error(501,
                                                 self.T("insufficient number of pages provided"))
                                print eachcomponent.tag
                                crop_box = (
                                    int(page_origin[0][0]+\
                                            (comp_x*\
                                                 images[comp_page]["scalefactor"]["x"])),
                                    int(page_origin[0][1]+\
                                            (comp_y*\
                                                 images[comp_page]["scalefactor"]["y"])),
                                    int(page_origin[0][0]+\
                                            (comp_x*\
                                                 images[comp_page]["scalefactor"]["x"])+\
                                            comp_side*comp_boxes*images[comp_page]["scalefactor"]["x"]),
                                    int(page_origin[0][1]+\
                                            (comp_y*\
                                                 images[comp_page]["scalefactor"]["y"])+\
                                            comp_side*images[comp_page]["scalefactor"]["y"]),
                                    )
                                cropped_image = images[comp_page]["image"].crop(crop_box)
                                output = self.__ocrIt(cropped_image,
                                                      form_uuid,
                                                      resourcename,
                                                      linenum)
                                print output
                                linenum+=1
                    else:
                        continue

        output = etree.tostring(layout_etree, pretty_print=True)
        return output #"%s %s %s" % (markers, orientation, scalefactor)

    def __ocrIt(self,
                image,
                form_uuid,
                resourcename,
                linenum,
                content_type="textbox"):
        """ put Tesseract into work, actual OCRing will be done here """

        if content_type=="optionbox":
            stat = ImageStat.Stat(image)
            if stat.mean[0] < 96 :
                return True
        elif content_type=="textbox":
            uniqueuuid = uuid.uuid1() # to make it thread safe
            inputfilename = "%s_%s_%s_%s.tif" % (uniqueuuid,
                                              form_uuid,
                                              resourcename,
                                              linenum)
            outputfilename = "%s_%s_%s_%s_text" % (uniqueuuid,
                                                form_uuid,
                                                resourcename,
                                                linenum)
            web2pywd = os.getcwd()
            os.chdir(os.path.join(web2pywd, "applications/eden/uploads"))
            try:
                os.mkdir("ocr_temp")
            except(OSError):
                pass
            uploadwd = os.getcwd()
            os.chdir(os.path.join(uploadwd, "ocr_temp"))
            image.save(inputfilename)
            success =\
                os.system("tesseract \"%s\" \"%s\" -psm 7 >> /dev/null" % (inputfilename,
                                                                           outputfilename))
            if success != 0:
                self.r.error(501, self.T("Tesseract not installed"))
            outputfile = open("%s.txt" % outputfilename)
            outputtext = outputfile.read()
            outputfile.close()
            output = outputtext.replace("\n", " ")
            os.remove(inputfilename)
            os.remove("%s.txt" % outputfilename)
            os.chdir(uploadwd)
            os.removedirs("ocr_temp")
            os.chdir(web2pywd)
            return output

    def __convertImage2binary(self, image, threshold = 180):
        """ Converts the image into binary based on a threshold. here it is 180"""
        image = ImageOps.grayscale(image)
        image.convert("L")

        width, height = image.size

        for x in xrange(width):
            for y in xrange(height):
                if image.getpixel((x,y)) < 180 :
                    image.putpixel((x,y), 0)
                else:
                    image.putpixel((x,y), 255)
        return image

    def __findRegions(self, im):
        """
        Return the list of regions which are found by the following algorithm.

        -----------------------------------------------------------
        Raster Scanning Algorithm for Connected Component Analysis:
        -----------------------------------------------------------

        On the first pass:
        =================
        1. Iterate through each element of the data by column, then by row (Raster Scanning)
        2. If the element is not the background
            1. Get the neighboring elements of the current element
            2. If there are no neighbors, uniquely label the current element and continue
            3. Otherwise, find the neighbor with the smallest label and assign it to the current element
            4. Store the equivalence between neighboring labels

        On the second pass:
        ===================
        1. Iterate through each element of the data by column, then by row
        2. If the element is not the background
           1. Relabel the element with the lowest equivalent label
        ( source: http://en.wikipedia.org/wiki/Connected_Component_Labeling )
        """

        width, height  = im.size
        ImageOps.grayscale(im)
        im = im.convert("L")

        regions = {}
        pixel_region = [[0 for y in xrange(height)] for x in xrange(width)]
        equivalences = {}
        n_regions = 0

        #first pass. find regions.
        for x in xrange(width):
            for y in xrange(height):
                #look for a black pixel
                if im.getpixel((x, y)) == 0 : #BLACK
                    # get the region number from north or west or create new region
                    region_n = pixel_region[x-1][y] if x > 0 else 0
                    region_w = pixel_region[x][y-1] if y > 0 else 0
                    #region_nw = pixel_region[x-1][y-1] if x > 0 and y > 0 else 0
                    #region_ne = pixel_region[x-1][y+1] if x > 0 else 0

                    max_region = max(region_n, region_w)

                    if max_region > 0:
                        #a neighbour already has a region, new region is the smallest > 0
                        new_region = min(filter(lambda i: i > 0, (region_n, region_w)))
                        #update equivalences
                        if max_region > new_region:
                            if max_region in equivalences:
                                equivalences[max_region].add(new_region)
                            else:
                                equivalences[max_region] = set((new_region, ))
                    else:
                        n_regions += 1
                        new_region = n_regions

                    pixel_region[x][y] = new_region

        #Scan image again, assigning all equivalent regions the same region value.
        for x in xrange(width):
            for y in xrange(height):
                r = pixel_region[x][y]
                if r > 0:
                    while r in equivalences:
                        r = min(equivalences[r])

                    if r in regions:
                        regions[r].add(x, y)
                    else:
                        regions[r] = self.__Region(x, y)

        return list(regions.itervalues())

    def __getOrientation(self, markers):
        """ Returns orientation of the sheet in radians """
        x1, y1 = markers[0]
        x2, y2 = markers[2]
        try:
            slope = ((x2-x1)*1.0) / ((y2-y1)*1.0)
        except(ZeroDivisionError):
            slope = 999999999999999999999999999
        return math.atan(slope)*(180.0/math.pi)*(-1)

    def __scaleFactor(self, markers):
        """ Returns the scale factors lengthwise and breadthwise """
        stdWidth = sum((596, -60))
        stdHeight = sum((842, -60))
        li = [markers[0], markers[2]]
        sf_y = self.__distance(li)/stdHeight
        li = [markers[6], markers[2]]
        sf_x = self.__distance(li)/stdWidth
        return {"x":sf_x, "y":sf_y}

    def __distance(self, li):
        """ returns the euclidean distance if the input is of the form [(x1, y1), (x2, y2)]"""
        return math.sqrt(math.fsum((math.pow(math.fsum((int(li[1][0]), -int(li[0][0]))), 2), math.pow(math.fsum((int(li[1][1]), -int(li[0][1]))), 2))))


    def __getMarkers(self, image):
        """ Gets the markers on the OCR image """
        centers = {}
        present = 0

        regions = self.__findRegions(image)

        for r in regions:
            if r.area > 320 and r.aspectratio() < 1.5 and r.aspectratio() > 0.67:
                present += 1
                centers[present] = r.centroid()

        # This is the list of all the markers on the form.
        markers = list(centers.itervalues())
        markers.sort()
        l1 = sorted(markers[0:3], key=lambda y: y[1])
        l2 = markers[3:4]
        l3 = sorted(markers[4:7], key=lambda y: y[1])
        markers = []
        markers.extend(l1)
        markers.extend(l2)
        markers.extend(l3)
        #markers.sort(key=lambda x: (x[0], x[1]))
        return markers

    class __Region():
        """ Self explainatory """
        def __init__(self, x, y):
            """ Initialize the region """
            self._pixels = [(x, y)]
            self._min_x = x
            self._max_x = x
            self._min_y = y
            self._max_y = y
            self.area = 1

        def add(self, x, y):
            """ Add a pixel to the region """
            self._pixels.append((x, y))
            self.area += 1
            self._min_x = min(self._min_x, x)
            self._max_x = max(self._max_x, x)
            self._min_y = min(self._min_y, y)
            self._max_y = max(self._max_y, y)

        def centroid(self):
            """ Returns the centroid of the bounding box """
            return ((self._min_x + self._max_x)/2 , (self._min_y + self._max_y)/2)

        def box(self):
            """ Returns the bounding box of the region """
            return [ (self._min_x, self._min_y) , (self._max_x, self._max_y)]

        def aspectratio(self):
            """ Calculating the aspect ratio of the region """
            width = self._max_x - self._min_x
            length = self._max_y - self._min_y
            return float(width)/float(length)


#==============================================================================
#==================== unicode support to reportlab ============================
#==============================================================================

fonts_directory = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               "../../static/fonts")

#------------------------------------------------------------------------------
# unifont - considered to be an allrounder
#------------------------------------------------------------------------------

try:
    pdfmetrics.registerFont(TTFont("unifont",
                                   os.path.join(fonts_directory,
                                                "unifont/unifont.ttf")))
    unifont_map = [
        (0, 65536),
        ]
except:
    unifont_map = []
    print >> sys.stderr, "S3 Debug: s3ocr: unifont not found, run static/fonts/setfonts.py"

#------------------------------------------------------------------------------
# Arabic fonts
#------------------------------------------------------------------------------

try:
    pdfmetrics.registerFont(TTFont("AlMateen-Bold",
                                   os.path.join(fonts_directory,
                                                "arabic/ae_AlMateen-Bold.ttf")))
    from fontmap.AlMateenBold import AlMateenBold_map

    pdfmetrics.registerFont(TTFont("AlMohanad",
                                   os.path.join(fonts_directory,
                                                "arabic/ae_AlMohanad.ttf")))
    from fontmap.AlMohanad import AlMohanad_map

except:
    AlMateenBold_map = []
    AlMohanad_map = []
    print >> sys.stderr, "S3 Debug: s3ocr: arabic fonts not found, run static/fonts/setfonts.py"

#------------------------------------------------------------------------------
# japanese fonts
#------------------------------------------------------------------------------

try:
    pdfmetrics.registerFont(TTFont("SazanamiGothic",
                                   os.path.join(fonts_directory,
                                                "japanese/sazanami-gothic.ttf")))
    from fontmap.SazanamiGothic import SazanamiGothic_map

    pdfmetrics.registerFont(TTFont("SazanamiMincho",
                                   os.path.join(fonts_directory,
                                                "japanese/sazanami-mincho.ttf")))
    from fontmap.SazanamiMincho import SazanamiMincho_map

except:
    SazanamiGothic_map = []
    SazanamiMincho_map = []
    print >> sys.stderr, "S3 Debug: s3ocr: japanese fonts not found, run static/fonts/setfonts.py"

#--------------------------------------------------------------------------
# Standard fonts
#--------------------------------------------------------------------------

Helvetica = "Helvetica"
Helvetica_map = [
    (32, 127),
    (160, 161),
    (173, 173),
    ]

# Fonts
#Courier = "Courier"
#Helvetica_Bold = "Helvetica-Bold"
#Helvetica_Bold_Oblique = "Helvetica-BoldOblique"
#Helvetica_Oblique = "Helvetica-Oblique"

#--------------------------------------------------------------------------
# some global variables
#--------------------------------------------------------------------------

fontlist = [
    "Helvetica",         # english and latin english fonts
    "AlMateen-Bold",     # arabic fonts
    "AlMohanad",         # arabic fonts
    "SazanamiGothic",    # japanese fonts
    "SazanamiMincho",    # japanese fonts
    "unifont",           # unifont should be always at the last
    ]

fontmapping = {
    "Helvetica": Helvetica_map,
    "AlMateen-Bold": AlMateenBold_map,
    "AlMohanad": AlMohanad_map,
    "SazanamiGothic": SazanamiGothic_map,
    "SazanamiMincho": SazanamiMincho_map,
    "unifont": unifont_map,
}

fontchecksequence = []

for eachfont in fontlist:
    if len(fontmapping[eachfont]) != 0:
        fontchecksequence.append(eachfont)


#==========================================================================
#=============== internal Class Definitions and functions =================
#==========================================================================

#======================== pdf layout from xform ===========================

class Form(object):
    """ Form class to use reportlab to generate pdf """

    def __init__(self, pdfname="ocrform.pdf", margintop=65, marginsides=50,
                 **kw):
        """ Form initialization """

        self.pdfpath = kw.get("pdfpath", pdfname)
        self.verbose = kw.get("verbose", 0)
        self.linespacing = kw.get("linespacing", 4)
        self.font = kw.get("typeface", "Helvetica")
        self.fontsize = kw.get("fontsize", 13)
        self.IObuffer = StringIO()
        self.canvas = Canvas(self.IObuffer, pagesize = A4)
        self.width, self.height = A4
        self.x = marginsides
        self.lastx = marginsides
        self.marginsides = marginsides
        self.margintop = margintop
        self.y = self.height - margintop
        self.lasty = self.height - margintop
        self.num = 1
        self.gray = 0
        self.pagebegin = 1
        self.form_uuid = kw.get("form_uuid" ,"")
        self.form_revision = kw.get("form_revision" ,"")
        self.form_resourcename = kw.get("form_resourcename" ,"")
        self.put_page_num()
        self.put_metainfo()

    def barcode(self, uuid):
        """ Generate barcode of uuid """

        barcode = code128.Code128(str(uuid), barWidth=1, barHeight=20)
        barcode.drawOn(self.canvas, self.lastx, self.lasty)
        self.lasty = self.lasty - 20
        self.y = self.lasty

    def decorate(self):
        """ Decorates the the form with the markers needed to align the form later """

        c = self.canvas
        c.rect(20, 20, 20, 20, fill=1)                              # bt lf
        c.rect(self.width - 40, 20, 20, 20, fill=1)                 # bt rt
        c.rect(20, self.height - 40, 20, 20, fill=1)                # tp lf
        c.rect(self.width/2 - 10, 20, 20, 20, fill=1)               # bt md
        c.rect(20, self.height/2 - 10, 20, 20, fill=1)              # md lf
        c.rect(self.width - 40, self.height - 40, 20, 20, fill=1)   # tp rt
        c.rect(self.width - 40, self.height/2 - 10, 20, 20, fill=1) # md rt
        self.origin = {"x": 29, "y": 29} # location of top left marker

    def print_text(self,
                   lines,
                   fontsize=13,
                   gray=0,
                   seek=0,
                   continuetext=0,
                   style="default"):
        """
        Give the lines to be printed as a list,
        set the font and grey level
        """

        self.fontsize = fontsize
        self.gray = gray

        if not continuetext and not self.pagebegin:
                self.resetx()
                self.nextline()

        self.pagebegin = 0

        if seek:
            self.resetx(seek=seek)

        numlines = len(lines)
        loopcounter = 0
        for line in lines:
            loopcounter += 1
            line = self.__html_unescape(unicode(line))

            # alignment
            if not continuetext:
                if style == "center":
                    self.x = \
                        (self.width - (len(line) * (self.fontsize / 2)))/2
                elif style == "right":
                    self.x = \
                        ((self.width - self.marginsides) -\
                             ((len(line)+3) * (self.fontsize / 2)))
            if continuetext:
                # wrapping multiline options
                if (self.width - self.marginsides - self.x) < 100:
                    self.resetx()
                    self.nextline()
            if (self.y - self.fontsize) < 50:
                self.set_new_page()
            for char in line:
                t = self.writechar(char)
                self.x = t.getX()
                self.y = t.getY()
                # text wrapping -> TODO: word wrapping
                if self.x > (self.width - self.marginsides - self.fontsize):
                    self.writechar("-")
                    self.nextline()
                    self.resetx(self.fontsize)
            if not continuetext and loopcounter != numlines:
                self.nextline()
                self.resetx()

    def writechar(self, char=" "):
        """
        Writes one character on canvas
        """

        font=self.selectfont(char)
        t = self.canvas.beginText(self.x, self.y)
        t.setFont(font, self.fontsize)
        t.setFillGray(self.gray)
        t.textOut(char)
        self.canvas.drawText(t)
        return t

    def nextline(self, fontsize=0):
        """
        Moves the y cursor down one line
        """

        if fontsize != 0:
            self.fontsize = fontsize

        if self.pagebegin == 0:
            self.y = self.y - (self.fontsize + self.linespacing)
            if self.y < self.margintop:
                self.set_new_page()

        self.pagebegin = 0

    def resetx(self, offset=0, seek=None):
        """
        Moves the x cursor with offset
        """

        if seek == None:
            self.x = self.marginsides + offset
        else:
            self.x += seek
        lastvalidx = self.width - (self.marginsides + (self.fontsize / 2))
        writablex = self.width - (2 * self.marginsides)
        if self.x > lastvalidx:
            currentx = self.x - self.marginsides
            remx = currentx % writablex
            self.x = remx + self.marginsides
            numlines = int(currentx / writablex)
            for line in xrange(numlines):
                self.nextline()

    def __html_unescape(self, text):
        """
        Helper function, unscape any html special characters
        """

        return re.sub("&(%s);" % "|".join(name2codepoint),
                      lambda m: unichr(name2codepoint[m.group(1)]),
                      text)

    def linespace(self, spacing=2):
        """
        Moves the y cursor down by given units
        """
        if self.pagebegin == 0:
            self.y -= spacing
        self.pagebegin = 0

    def selectfont(self, char):
        """ Select font according to the input character """

        charcode = ord(char)
        for font in fontchecksequence:
            for fontrange in fontmapping[font]:
                if charcode in xrange(fontrange[0], fontrange[1]):
                    return font
        return "Helvetica"  # fallback, if no thirdparty font is installed

    def draw_check_boxes(self,
                         boxes=1,
                         completeline=0,
                         lines=0,
                         seek=0,
                         continuetext=0,
                         fontsize=15,
                         gray=0,
                         style="",
                         ):
        """ Function to draw check boxes default no of boxes = 1 """

        if not continuetext and not self.pagebegin:
            self.resetx()
            self.nextline()
        self.pagebegin = 0
        self.fontsize = fontsize
        c = self.canvas
        c.setLineWidth(0.90)
        c.setStrokeGray(gray)
        if style == "center":
            self.x = self.width / 2
        elif style == "right":
            self.x = self.width - self.marginsides - self.fontsize
        if seek > (self.width - (self.marginsides + self.fontsize)):
            seek = 0
        if (self.y - self.fontsize) < 40:
            self.set_new_page()
        #if continuetext == 1:
        #    self.y = self.y + self.fontsize
        #    self.x = self.lastx
        #else:
        #    self.x = self.marginsides
        if seek != 0:
            self.x = self.x + seek
        if fontsize == 0:
            fontsize = self.fontsize
        else:
            self.fontsize = fontsize
        if completeline == 1:
            boxes = int(self.width / self.fontsize)
        box_startpx = {
            "x": self.x - self.origin["x"],
            "y": (842-self.y-self.fontsize) - self.origin["y"],
            "side": self.fontsize - 1,
            "boxes": boxes,
            "page": self.num
            }
        for i in range(boxes):
            c.rect(self.x, self.y, self.fontsize, self.fontsize)
            self.x = self.x + self.fontsize
            if self.x > (self.width - (self.marginsides + self.fontsize)):
                break
        self.lastx = self.x
        #self.x = self.marginsides
        #self.y = self.y - self.fontsize
        #if isdate:
        #    t = c.beginText(self.x, self.y)
        #    t.setFont(Helvetica, 13)
        #    t.setFillGray(0)
        #    t.textOut("   D  D  M  M  Y  Y  Y  Y")
        #    c.drawText(t)
        #    self.y = self.y - fontsize
        #    self.lastx = t.getX()
        #    self.lasty = self.y
        #if isdatetime:
        #    t = c.beginText(self.x, self.y)
        #    t.setFont(Helvetica, 12.5)
        #    t.setFillGray(0.4)
        #    t.textOut("   D  D  M  M  Y  Y  Y  Y -H  H :M  M")
        #    c.drawText(t)
        #    self.y = self.y - fontsize
        #    self.lastx = t.getX()
        #    self.lasty = self.y
        self.lastx = self.x
        return box_startpx

    def draw_circle(self,
                    boxes=1,
                    completeline=0,
                    lines=0,
                    seek=0,
                    continuetext=0,
                    fontsize=0,
                    gray=0,
                    style=""):
        """ Draw circles on the form """

        c = self.canvas
        c.setLineWidth(0.90)
        c.setStrokeGray(gray)
        self.resetx(seek=seek)
        #if style == "center":
        #    self.x = self.width / 2
        #elif style == "right":
        #    self.x = self.width - self.marginsides - self.fontsize
        #if seek > (self.width - (self.marginsides + self.fontsize)):
        #    seek = 0
        #if (self.y - self.fontsize) < 40:
        #    self.set_new_page()
        #if continuetext == 1:
        #    self.y = self.y + self.fontsize
        #    self.x = self.lastx
        #else:
        #    self.x = self.marginsides
        #if seek != 0:
        #    self.x = self.x + seek
        #if fontsize == 0:
        #    fontsize = self.fontsize
        #else:
        #    self.fontsize = fontsize
        #if completeline == 1:
        #    boxes = int(self.width / self.fontsize)
        circle_center = {
            "x": (self.x + self.fontsize/2) - self.origin["x"],
            "y": (842 - self.y - self.fontsize/2) - self.origin["y"],
            "radius": self.fontsize/2,
            "boxes" : boxes,
            "page": self.num
            }
        for eachcircle in xrange(boxes):
            c.circle(self.x + self.fontsize/2, self.y + self.fontsize/2,
                     self.fontsize/2, fill = 0)
            self.resetx(seek=self.fontsize)
            self.resetx(seek=seek)
        #    if self.x > (self.width - (self.marginsides + self.fontsize)):
        #        break
        #self.lastx = self.x
        #self.x = self.marginsides
        #self.y = self.y - self.fontsize
        return circle_center

    def draw_line(self, gray=0, nextline=0):
        """ Function to draw a straight line """

        self.fontsize = 4
        if nextline:
            self.nextline()
        else:
            self.linespace(8)
        self.resetx()
        c = self.canvas
        c.setStrokeGray(gray)
        c.setLineWidth(1)
        #self.y = self.y + self.linespacing + (self.fontsize/2)
        c.line(self.x, self.y, self.width - self.x, self.y)
        self.y = self.y + (self.linespacing)

    def set_new_page(self):
        """
            All changes are forgotten when a showPage() has been executed.
            They have to be set again.
        """
        self.num += 1
        c = self.canvas
        c.showPage()
        self.decorate()
        self.x = self.marginsides
        self.lastx = self.marginsides
        self.y = self.height - self.margintop
        #self.print_text(["Page %s" % unicode(self.num)], fontsize=8,
        #                style="right")
        self.put_page_num()
        self.put_metainfo()
        #self.x = self.marginsides
        #self.lastx = self.x
        #self.y = self.y - 32
        self.pagebegin = 1

    def put_metainfo(self):
        # preserve state
        x, y = self.x, self.y
        fontsize = self.fontsize

        # do the job
        self.fontsize = 10
        uuid_text = "UUID: %s" % self.form_uuid
        rest_text =  "Revision: %s   Resource: %s" % (self.form_revision,
                                                      self.form_resourcename)
        self.x = self.marginsides
        self.y = 25
        for char in uuid_text:
            t = self.writechar(char)
            self.x = t.getX()
            self.y = t.getY()

        self.x = (self.width/2) + 20
        self.y = 25
        for char in rest_text:
            t = self.writechar(char)
            self.x = t.getX()
            self.y = t.getY()

        # restore state
        self.fontsize = fontsize
        self.x, self.y = x, y

    def put_page_num(self):
        # preserve state
        x, y = self.x, self.y
        fontsize = self.fontsize

        # do the job
        self.fontsize = 10
        text = "page%s" % self.num
        self.x = self.width - \
            (((len(text)+2)*(self.fontsize/2)) + self.marginsides)
        self.y = 25
        for char in text:
            t = self.writechar(char)
            self.x = t.getX()
            self.y = t.getY()

        # restore state
        self.fontsize = fontsize
        self.x, self.y = x, y

    def set_title(self, title = "FORM"):
        """ Sets the title of the pdf. """

        c = self.canvas.setTitle(title)

    def save(self):
        """ Saves the form """

        self.canvas.save()
        pdf = self.IObuffer.getvalue()
        self.IObuffer.close()
        return pdf