Skip to content

Instantly share code, notes, and snippets.

@idlecool
Created June 13, 2011 12:33
Show Gist options
  • Save idlecool/1022698 to your computer and use it in GitHub Desktop.
Save idlecool/1022698 to your computer and use it in GitHub Desktop.
Old s3ocr.py which has been moved to s3pdf.py - SahanaEden
# -*- coding: utf-8 -*-
""" Sahana Optical Character Recognision Utility (s3ocr)
@author: Suryajith Chillara <suryajith1987[at]gmail.com>
@author: Shiv Deepak <idlecool[at]gmail.com>
@copyright: 2009-2011 (c) Sahana Software Foundation
@license: MIT
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
"""
__all__ = ["S3OCR"]
#========================== import section ====================================
# Generic stuff
import os
import sys
import re
import uuid
import Image
import ImageOps
import ImageStat
import math
from StringIO import StringIO
from htmlentitydefs import name2codepoint
from lxml import etree
# Importing reportlab stuff
try:
from reportlab.pdfgen.canvas import Canvas
from reportlab.lib.pagesizes import A4
from reportlab.graphics.barcode import code128
# for adding more fonts
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import reportlab
reportlab.rl_config.warnOnMissingFontGlyphs = 0
except(ImportError):
print >> sys.stderr, "S3 Debug: WARNING: S3OCR: reportlab has not been installed."
from gluon.storage import Storage
from s3rest import S3Method
from s3cfg import S3Config
#==========================================================================
#================================= OCR API ================================
#==========================================================================
class S3OCR(S3Method):
"""
Generate XForms and PDFs the s3 way
"""
def apply_method(self,
r,
**attr):
"""
S3Method's abstract method
"""
xml = self.manager.xml
self.r = r
# s3ocr_config - dict which stores ocr configuration
# settings for a resource)
s3ocr_config = attr.get("s3ocr_config", {})
# storing localised names of components
self.rheader_tabs = s3ocr_config.get("tabs", [])
# store custom pdf title (if any)
self.pdftitle = s3ocr_config.get("pdftitle", None)
# store components which have to be excluded
self.exclude_component_list = s3ocr_config.get("exclude_components", [])
# store individual field specific properties
self.custom_field_properties = s3ocr_config.get("field_properties", {})
# example field_properties
# field_properties = {
# "%s_%s__%s" % (prefix, resourcename, fieldname): { fieldtype="",
# .
# .
# }
# }
# store individual fieldtype specific properties
s3config = S3Config(globals(), self.T)
self.custom_fieldtype_properties = \
s3config.get_s3ocr_fieldtype_properties()
# example field_properties
# field_properties = {
# fieldtype : { fieldtype="",
# .
# .
# }
# }
# field type convention mapping from resource to pdf forms
self.generic_ocr_field_type = {
"string": "string",
"text": "textbox",
"boolean" : "boolean",
"double": "double",
"date": "date",
"datetime": "datetime",
"integer": "integer",
"list:integer": "multiselect",
"list:string": "multiselect",
"list:double": "multiselect",
"list:text": "multiselect",
}
# text for localisation
self.l10n = {
"datetime_hint": {
"date": self.T("fill in order: day(2) month(2) year(4)"),
"datetime": self.T("fill in order: hour(2) min(2) day(2) month(2) year(4)"),
},
"ocr_inst": {
"inst1": self.T("1. Fill the necessary fields in BLOCK CAPITAL letters."),
"inst2": self.T("2. Always use one box per letter and leave one box space to separate words."),
"inst3": self.T("3. Fill in the circles completely."),
},
"boolean": {
"yes": self.T("Yes"),
"no": self.T("No"),
},
"select": {
"multiselect": self.T("Select one or more option(s) that apply"),
"singleselect": self.T("Select any one option that apply"),
},
}
# check if debug mode is enabled
if r.vars.get("_debug", False) == "1":
self.debug = True
else:
self.debug = False
if self.debug:
content_disposition = "inline"
else:
content_disposition = "attachment"
# serve the request
format = r.representation
if r.http == "GET":
if format == "xml":
output = self.s3ocr_etree()
self.response.view = "xml.html"
self.response.headers["Content-Type"] = "application/xml"
return xml.tostring(output, pretty_print=True)
elif format == "pdf":
if r.vars.get("_operation", False) == "putpdf":
output = self.s3ocr_parser()
self.response.view = None
self.response.headers["Content-Type"] = "text/plain"
#self.response.headers["Content-disposition"] = \
# "%s; filename=\"%s.pdf\"" % (content_disposition,
# self.tablename)
return output
else:
form_uuid = uuid.uuid1()
form_revision = self.__book_revision(form_uuid)
output, layout_etree = self.pdf_manager(form_uuid, form_revision)
self.__update_dbmeta(layout_xml=etree.tostring(layout_etree),
form_uuid=form_uuid,
revision=form_revision)
self.response.view = None
self.response.headers["Content-Type"] = "application/pdf"
self.response.headers["Content-disposition"] = \
"%s; filename=\"%s.pdf\"" % (content_disposition,
self.tablename)
return output
else:
r.error(501, self.manager.ERROR.BAD_FORMAT)
elif r.http in ("POST","PUT"):
if format == "xml":
r.error(501, self.manager.ERROR.NOT_IMPLEMENTED)
elif format == "pdf":
r.error(501, self.manager.ERROR.NOT_IMPLEMENTED)
else:
r.error(501, self.manager.ERROR.BAD_FORMAT)
else:
r.error(501, self.manager.ERROR.BAD_METHOD)
def s3ocr_etree(self):
"""
Optimise & Modifiy s3xml etree to and produce s3ocr etree
"""
s3xml_etree = self.resource.struct(options=True,
references=True,
stylesheet=None,
as_json=False,
as_tree=True)
# xml tags
ITEXT = "label"
HINT = "comment"
TYPE = "type"
HASOPTIONS = "has_options"
LINES = "lines"
BOXES = "boxes"
# Components Localised Text added to the etree
# Convering s3xml to s3ocr_xml (nicer to traverse)
s3xml_root = s3xml_etree.getroot()
resource_element = s3xml_root.getchildren()[0]
s3ocr_root = etree.Element("s3ocr")
if self.r.component: # if it is a component
component_sequence, components_l10n_dict = \
self.__rheader_tabs_sequence(self.r.tablename)
resource_element.set(ITEXT,
components_l10n_dict.get(None,
self.resource.tablename))
s3ocr_root.append(resource_element)
else: # if it is main resource
componentetrees = []
# mres is main resource etree
mres = etree.Element("resource")
for attr in resource_element.attrib.keys():
mres.set(attr, resource_element.attrib.get(attr))
for field_element in resource_element:
if field_element.tag == "field": # main resource fields
mres.append(field_element)
elif field_element.tag == "resource": # component resource
componentetrees.append(field_element)
# Serialisation of Component List and l10n
component_sequence, components_l10n_dict = \
self.__rheader_tabs_sequence(self.r.tablename)
mres.set(ITEXT, components_l10n_dict.get(None,
self.resource.tablename))
if component_sequence:
serialised_component_etrees = []
for eachcomponent in component_sequence:
component_table = "%s_%s" % (self.prefix, eachcomponent)
for eachtree in componentetrees:
if eachtree.attrib.get("name", None) == component_table:
# l10n strings are added and sequencing is done here
eachtree.set(ITEXT,
components_l10n_dict.get(eachcomponent,
component_table))
serialised_component_etrees.append(eachtree)
else:
serialised_component_etrees = componentetrees
# create s3ocr tree
s3ocr_root.append(mres)
for res in serialised_component_etrees:
s3ocr_root.append(res)
# remove fields which are not required
# loading user defined configuartions
FIELD_TYPE_LINES = { # mapping types with number of lines
"string": 2,
"textbox": 4,
"integer": 1,
"double": 1,
"date": 1,
"datetime": 1,
}
FIELD_TYPE_BOXES = { # mapping type with numboxes
"integer": 9,
"double": 16,
}
for eachresource in s3ocr_root.iterchildren():
resourcetablename = eachresource.attrib.get("name")
if eachresource.attrib.get("name") in self.exclude_component_list:
# excluded components are removed
s3ocr_root.remove(eachresource)
continue
for eachfield in eachresource.iterchildren():
fieldname = eachfield.attrib.get("name")
# fields which have to be displayed
fieldtype = eachfield.attrib.get(TYPE)
# loading ocr specific fieldtypes
ocrfieldtype = self.generic_ocr_field_type.get(fieldtype,
None)
if ocrfieldtype != None:
eachfield.set(TYPE, ocrfieldtype)
# refresh fieldtypes after update
fieldtype = eachfield.attrib.get(TYPE)
# set num boxes and lines
fieldhasoptions = eachfield.attrib.get(HASOPTIONS)
if fieldhasoptions == "False":
eachfield.set(LINES,
str(FIELD_TYPE_LINES.get(fieldtype,
1)))
if fieldtype in FIELD_TYPE_BOXES.keys():
eachfield.set(BOXES,
str(FIELD_TYPE_BOXES.get(fieldtype)))
# if field is readable but not writable set default value
if eachfield.attrib.get("readable", "False") == "True" and \
eachfield.attrib.get("writable", "False") == "False":
try:
fieldresourcename = \
eachresource.attrib.get("name").split("%s_" %\
self.prefix)[1]
except:
fieldresourcename = \
eachresource.attrib.get("name").split("_")[1]
fieldresource = \
self.resource.components.get(fieldresourcename, None)
if not fieldresource:
fieldresource = self.resource
fieldname = eachfield.attrib.get("name")
try:
fielddefault = self.r.resource.table[fieldname].default
except(KeyError):
fielddefault = "None"
eachfield.set("default",
str(fielddefault))
# load custom fieldtype specific settings
if fieldtype not in self.generic_ocr_field_type.values() \
and fieldtype in self.custom_fieldtype_properties.keys():
self.__update_custom_fieldtype_settings(eachfield)
# refresh fieldtypes after update
fieldtype = eachfield.attrib.get(TYPE)
# for unknown field types
if fieldtype not in self.generic_ocr_field_type.values():
eachfield.set(TYPE, "string")
eachfield.set(HASOPTIONS, "False")
eachfield.set(LINES, "2")
# refresh fieldtypes after update
fieldtype = eachfield.attrib.get(TYPE)
# loading custom field specific settings
self.__update_custom_field_settings(eachfield,
resourcetablename,
fieldname)
# in ocr boolean fields should be shown as options
if fieldtype == "boolean":
eachfield.set(HASOPTIONS, "True")
# fields removed which need not be displayed
if eachfield.attrib.get("readable", "False") == "False" and \
eachfield.attrib.get("writable", "False") == "False":
eachresource.remove(eachfield)
continue
if eachfield.attrib.get(HASOPTIONS, "False") == "True" and \
eachfield.attrib.get(TYPE) != "boolean":
s3ocrselect = eachfield.getchildren()[0]
for eachoption in s3ocrselect.iterchildren():
if eachoption.text == "" or eachoption.text == None:
s3ocrselect.remove(eachoption)
continue
return s3ocr_root
def pdf_manager(self, form_uuid, form_revision):
"""
Produces OCR Compatible PDF forms
"""
s3ocr_root = self.s3ocr_etree() # get element s3xml
s3ocr_layout_etree = etree.Element("s3ocrlayout")
# define font size
titlefontsize = 18
sectionfontsize = 15
regularfontsize = 13
hintfontsize = 10
# etree labels
ITEXT = "label"
HINT = "comment"
TYPE = "type"
HASOPTIONS = "has_options"
LINES = "lines"
BOXES = "boxes"
#l10n
l10n = self.l10n
# get pdf title
if self.pdftitle == None or self.pdftitle == "":
try:
pdftitle = self.manager.s3.crud_strings[\
self.tablename].subtitle_list.decode("utf-8")
except:
pdftitle = self.resource.tablename
else:
pdftitle = self.pdftitle
# prepare pdf
form = Form(form_uuid=form_uuid,
form_revision=form_revision,
form_resourcename="%s_%s" % (self.prefix,
self.resource.name))
form.decorate()
# set header
form.canvas.setTitle(pdftitle) # set pdf meta title
form.print_text([pdftitle,],
fontsize=titlefontsize,
style="center") # set pdf header title
form.print_text(
[
unicode(l10n.get("ocr_inst").get("inst1").decode("utf-8")),
unicode(l10n.get("ocr_inst").get("inst2").decode("utf-8")),
unicode(l10n.get("ocr_inst").get("inst3").decode("utf-8"))
],
fontsize=regularfontsize,
gray=0)
form.linespace(3)
# printing the etree
for eachresource in s3ocr_root:
# create resource element of ocr layout xml
s3ocr_layout_resource_etree =\
etree.SubElement(s3ocr_layout_etree,
"resource", name=eachresource.attrib.get("name"))
form.draw_line()
form.print_text([
eachresource.attrib.get(ITEXT,
eachresource.attrib.get("name"))
],
fontsize=sectionfontsize)
form.draw_line(nextline=1)
form.linespace(12) # line spacing between each field
for eachfield in eachresource.iterchildren():
# create field element of ocr layout xml
s3ocr_layout_field_etree =\
etree.SubElement(s3ocr_layout_resource_etree,
"field",
name=eachfield.attrib.get("name"),
type=eachfield.attrib.get("type"))
fieldlabel = eachfield.attrib.get(ITEXT)
spacing = " " * 5
fieldhint = self.__trim(eachfield.attrib.get(HINT))
if fieldhint != "" and fieldhint != None:
form.print_text(["%s%s( %s )" % \
(fieldlabel,
spacing,
fieldhint)],
fontsize=regularfontsize)
else:
form.print_text([fieldlabel],
fontsize=regularfontsize)
if eachfield.attrib.get("readable", "False") == "True" and \
eachfield.attrib.get("writable", "False") == "False":
# if it is a readonly field
form.print_text(
[eachfield.attrib.get("default","No default Value")],
seek=10,
)
elif eachfield.attrib.get(HASOPTIONS) == "True":
fieldtype = eachfield.attrib.get(TYPE)
# if the field has to be shown with options
if fieldtype == "boolean":
form.nextline()
form.resetx()
bool_text = l10n.get("boolean")
form.print_text(
[bool_text.get("yes").decode("utf-8")],
continuetext=1,
seek=3,
)
loc_info = form.draw_circle(
boxes=1,
continuetext=1,
gray=0.9,
seek=10,
fontsize=12,
)
# create checkbox element of ocr layout xml
s3ocr_layout_optionbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"optionbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
radius=str(loc_info["radius"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_optionbox_etree.text = "yes"
form.print_text(
[bool_text.get("no").decode("utf-8")],
continuetext=1,
seek=10,
)
loc_info = form.draw_circle(
boxes=1,
continuetext=1,
gray=0.9,
seek=10,
fontsize=12,
)
# create checkbox element of ocr layout xml
s3ocr_layout_optionbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"optionbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
radius=str(loc_info["radius"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_optionbox_etree.text = "no"
else:
if fieldtype == "multiselect":
option_hint = l10n.get("select").get("multiselect")
else:
option_hint = l10n.get("select").get("singleselect")
form.print_text(
[option_hint.decode("utf-8")],
fontsize=hintfontsize,
gray=0.4,
seek=3,
)
s3ocrselect = eachfield.getchildren()[0]
form.nextline(regularfontsize)
form.resetx() # move cursor to the front
optionseek = 10
# resting margin for options
formmargin = form.marginsides
form.marginsides = optionseek + formmargin
for eachoption in s3ocrselect.iterchildren():
form.print_text(
[eachoption.text],
continuetext=1,
fontsize = regularfontsize,
seek = 10,
)
loc_info = form.draw_circle(
boxes=1,
continuetext=1,
gray=0.9,
seek=10,
fontsize=12,
)
# create checkbox element of ocr layout xml
s3ocr_layout_optionbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"optionbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
radius=str(loc_info["radius"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_optionbox_etree.text =\
eachoption.attrib.get("value")
# restoring orginal margin
form.marginsides = formmargin
else:
# if it is a text field
fieldtype = eachfield.attrib.get(TYPE)
BOXES_TYPES = ["string", "textbox", "integer",
"double", "date", "datetime",]
if fieldtype in BOXES_TYPES:
if fieldtype in ["string", "textbox"]:
form.linespace(3)
num_lines = int(eachfield.attrib.get("lines",
1))
for eachline in xrange(num_lines):
loc_info = form.draw_check_boxes(
completeline=1,
gray=0.9,
seek=3,
)
# create checkbox element of ocr layout xml
s3ocr_layout_textbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"textbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
side=str(loc_info["side"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_textbox_etree.text = " "
elif fieldtype in ["integer", "double"]:
num_boxes = int(eachfield.attrib.get("boxes",
9))
form.linespace(3)
loc_info = form.draw_check_boxes(
boxes = num_boxes,
gray=0.9,
seek=3,
)
# create checkbox element of ocr layout xml
s3ocr_layout_textbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"textbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
side=str(loc_info["side"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_textbox_etree.text = " "
elif fieldtype in ["date", "datetime"]:
# print hint
hinttext = \
l10n.get("datetime_hint").get(fieldtype).decode("utf-8")
form.print_text(
[hinttext],
fontsize=hintfontsize,
gray=0.4,
seek=3,
)
form.linespace(8)
datetime_continuetext = 0
datetime_seek = 3
if fieldtype == "datetime":
datetime_continuetext = 1
datetime_seek = 6
#HH
loc_info = form.draw_check_boxes(
boxes = 2,
gray=0.9,
seek = 3,
)
# create checkbox element of ocr layout xml
s3ocr_layout_textbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"textbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
side=str(loc_info["side"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_textbox_etree.text = "HH"
#MM
loc_info = form.draw_check_boxes(
boxes = 2,
gray=0.9,
continuetext=1,
seek = 4,
)
# create checkbox element of ocr layout xml
s3ocr_layout_textbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"textbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
side=str(loc_info["side"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_textbox_etree.text = "MM"
# DD
loc_info = form.draw_check_boxes(
boxes = 2,
gray=0.9,
continuetext = datetime_continuetext,
seek = datetime_seek,
)
# create checkbox element of ocr layout xml
s3ocr_layout_textbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"textbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
side=str(loc_info["side"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_textbox_etree.text = "DD"
# MO
loc_info = form.draw_check_boxes(
boxes = 2,
gray=0.9,
continuetext=1,
seek = 4,
)
# create checkbox element of ocr layout xml
s3ocr_layout_textbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"textbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
side=str(loc_info["side"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_textbox_etree.text = "MO"
# YYYY
loc_info = form.draw_check_boxes(
boxes = 4,
gray=0.9,
continuetext=1,
seek = 4,
)
# create checkbox element of ocr layout xml
s3ocr_layout_textbox_etree =\
etree.SubElement(s3ocr_layout_field_etree,
"textbox",
x=str(loc_info["x"]),
y=str(loc_info["y"]),
side=str(loc_info["side"]),
boxes=str(loc_info["boxes"]),
page=str(loc_info["page"]))
s3ocr_layout_textbox_etree.text = "YYYY"
else:
self.r.error(501, self.manager.PARSE_ERROR)
print sys.stderr("%s :invalid field type: %s" %\
(eachfield.attrib.get("name"),
fieldtype))
return form.save(), s3ocr_layout_etree
def __update_custom_fieldtype_settings(self,
eachfield, #field etree
):
"""
Update custom fieldtype specific settings into the etree
"""
# xml attributes
TYPE = "type"
READABLE = "readable"
WRITABLE = "writable"
LABEL = "label"
HINT = "comment"
DEFAULT = "default"
LINES = "lines"
BOXES = "boxes"
HASOPTIONS = "has_options"
fieldtype = eachfield.attrib.get(TYPE)
field_property = self.custom_fieldtype_properties.get(fieldtype, {})
cust_fieldtype = fieldtype_property.get("fieldtype", None)
cust_readable = fieldtype_property.get("readable", None)
cust_writable = fieldtype_property.get("writable", None)
cust_label = fieldtype_property.get("label", None)
cust_hint = fieldtype_property.get("hint", None)
cust_default = fieldtype_property.get("default", None)
cust_lines = fieldtype_property.get("lines", None)
cust_boxes = fieldtype_property.get("boxes", None)
cust_has_options = fieldtype_property.get("has_options", None)
cust_options = fieldtype_property.get("options", None)
if cust_fieldtype:
if cust_fieldtype != None:
eachfield.set(TYPE, cust_fieldtype)
if cust_readable != None:
eachfield.set(READABLE, cust_readable)
if cust_writable != None:
eachfield.set(WRITABLE, cust_writable)
if cust_label != None:
eachfield.set(LABEL, cust_label)
if cust_hint != None:
eachfield.set(HINT, cust_hint)
if cust_default != None:
eachfield.set(DEFAULT, cust_default)
if cust_lines != None:
eachfield.set(LINES, cust_lines)
if cust_boxes != None:
eachfield.set(BOXES, cust_boxes)
if cust_has_options != None:
eachfield.set(HASOPTIONS, cust_has_options)
if cust_options != None:
opt_available = eachfield.getchildren()
if len(opt_available) == 0:
eachfield.append(cust_options)
elif len(opt_available) == 1:
eachfield.remove(opt_available[0])
eachfield.append(cust_options)
def __update_custom_field_settings(self,
eachfield, #field etree
resourcetablename,
fieldname
):
"""
Update custom field specific settings into the etree
"""
# xml attributes
TYPE = "type"
READABLE = "readable"
WRITABLE = "writable"
LABEL = "label"
HINT = "comment"
DEFAULT = "default"
LINES = "lines"
BOXES = "boxes"
HASOPTIONS = "has_options"
unikey = "%s__%s" % (resourcetablename, fieldname)
field_property = self.custom_field_properties.get(unikey, {})
cust_fieldtype = field_property.get("fieldtype", None)
cust_readable = field_property.get("readable", None)
cust_writable = field_property.get("writable", None)
cust_label = field_property.get("label", None)
cust_hint = field_property.get("hint", None)
cust_default = field_property.get("default", None)
cust_lines = field_property.get("lines", None)
cust_boxes = field_property.get("boxes", None)
cust_has_options = field_property.get("has_options", None)
cust_options = field_property.get("options", None)
if cust_fieldtype:
if cust_fieldtype != None:
eachfield.set(TYPE, cust_fieldtype)
if cust_readable != None:
eachfield.set(READABLE, cust_readable)
if cust_writable != None:
eachfield.set(WRITABLE, cust_writable)
if cust_label != None:
eachfield.set(LABEL, cust_label)
if cust_hint != None:
eachfield.set(HINT, cust_hint)
if cust_default != None:
eachfield.set(DEFAULT, cust_default)
if cust_lines != None:
eachfield.set(LINES, cust_lines)
if cust_boxes != None:
eachfield.set(BOXES, cust_boxes)
if cust_has_options != None:
eachfield.set(HASOPTIONS, cust_has_options)
if cust_options != None:
opt_available = eachfield.getchildren()
if len(opt_available) == 0:
eachfield.append(cust_options)
elif len(opt_available) == 1:
eachfield.remove(opt_available[0])
eachfield.append(cust_options)
def __rheader_tabs_sequence(self, resourcename):
"""
Sequence of components is returned as a list
"""
component_seq = []
component_l10n_dict = {}
rtabs = self.rheader_tabs
for eachel in rtabs:
if eachel[1] != None:
component_seq.append(eachel[1])
component_l10n_dict[eachel[1]] = eachel[0].decode("utf-8")
return component_seq, component_l10n_dict
def __trim(self, text):
"""
Helper to trim off any enclosing paranthesis
"""
if isinstance(text, str) and \
text[0] == "(" and \
text[-1] == ")":
text = text[1:-1]
return text
def __update_dbmeta(self, **kwargs):
"""
Store the PDF layout information into the database/disk.
"""
# basic fields
form_uuid = kwargs.get("form_uuid", None)
layout_xml = kwargs.get("layout_xml", None)
revision = kwargs.get("revision", None)
layout_file_stream = StringIO(layout_xml)
layout_file_name = "%s_xml" % form_uuid
db = self.db
tablename = "ocr_meta"
#is_component = False if (len(self.resource.components) == 0) else True
resource_name = "%s_%s" % (self.prefix, self.resource.name)
rows = db(db[tablename]["form_uuid"] == form_uuid).select()
row = rows[0]
row.update_record(layout_file=db[tablename]["layout_file"].store(\
layout_file_stream,
layout_file_name))
def __book_revision(self, form_uuid):
"""
Books a revision number for current operation in ocr_meta
"""
db = self.db
tablename = "ocr_meta"
resource_name = "%s_%s" % (self.prefix, self.resource.name)
#determiniing revision
selector = db[tablename]["revision"].max()
rows = db(db[tablename]["resource_name"]==resource_name).select(selector)
row = rows.first()
revision = 0 if (row[selector] == None) else (row[selector] + 1)
db[tablename].insert(form_uuid=form_uuid,
resource_name=resource_name,
revision=revision)
return revision
def s3ocr_parser(self, **kwargs):
""" performs OCR on a given set of pages """
pages = kwargs.get("pages", None)
raw_images = {}
images = {}
form_uuid = kwargs.get("form_uuid", None)
revision = kwargs.get("revision", None)
resourcename = kwargs.get("resourcename", None)
# <debug only>
for i in xrange(0, 11):
print "page %s" % i
raw_images[i+1] = Image.open(os.path.join("/home/idlecool/",
"pr_person-%s.png" % i))
pages = 1
form_uuid = "4ab7c932-8fdb-11e0-bf41-533e0c24f0a"
revision = 14
resourcename = "pr_person"
# </debug only>
# transform image
for each_img_index in raw_images.keys():
print each_img_index
images[each_img_index] = {}
images[each_img_index]["image"] =\
self.__convertImage2binary(raw_images[each_img_index])
images[each_img_index]["markers"] =\
self.__getMarkers(images[each_img_index]["image"])
images[each_img_index]["orientation"] =\
self.__getOrientation(images[each_img_index]["markers"])
if images[each_img_index]["orientation"] != 0.0:
images[each_img_index]["image"] =\
images[each_img_index]["image"].rotate(images[each_img_index]["orientation"])
images[each_img_index]["markers"] =\
self.__getMarkers(images[each_img_index]["image"])
images[each_img_index]["orientation"] =\
self.__getOrientation(images[each_img_index]["markers"])
images[each_img_index]["scalefactor"] =\
self.__scaleFactor(images[each_img_index]["markers"])
# get layout file, convert it to etree
db = self.db
layout_row =\
db(db["ocr_meta"]["form_uuid"]==form_uuid and\
db["ocr_meta"]["resource_name"]==resourcename and\
db["ocr_meta"]["revision"]==revision
).select(db["ocr_meta"]["layout_file"]).first()
layout_file = open(os.path.join("./applications/",
self.request.application,
'uploads/ocr_meta/',
layout_row["layout_file"]),
'rb')
layout_xml = layout_file.read()
layout_file.close()
layout_etree = etree.fromstring(layout_xml)
for eachresource in layout_etree:
for eachfield in eachresource:
field_type = eachfield.attrib.get("type")
components = eachfield.getchildren()
numcomponents = len(components)
if numcomponents == 0:
continue
else:
component_type = components[0].tag
if component_type in ("optionbox", "textbox"):
if component_type == "optionbox":
linenum = 0
for eachcomponent in components:
comp_x = float(eachcomponent.attrib.get("x"))
comp_y = float(eachcomponent.attrib.get("y"))
comp_boxes = int(eachcomponent.attrib.get("boxes"))
comp_radius = float(eachcomponent.attrib.get("radius"))
comp_page = int(eachcomponent.attrib.get("page"))
comp_value = str(eachcomponent.text)
try:
page_origin = images[comp_page]["markers"]
except(KeyError):
self.r.error(501,
self.T("insufficient number of pages provided"))
print eachcomponent.tag
crop_box = (
int(page_origin[0][0]+\
(comp_x*\
images[comp_page]["scalefactor"]["x"])-\
comp_radius*images[comp_page]["scalefactor"]["x"]),
int(page_origin[0][1]+\
(comp_y*\
images[comp_page]["scalefactor"]["y"])-\
comp_radius*images[comp_page]["scalefactor"]["y"]),
int(page_origin[0][0]+\
(comp_x*\
images[comp_page]["scalefactor"]["x"])+\
comp_radius*images[comp_page]["scalefactor"]["x"]),
int(page_origin[0][1]+\
(comp_y*\
images[comp_page]["scalefactor"]["y"])+\
comp_radius*images[comp_page]["scalefactor"]["y"]),
)
temp_image = images[comp_page]["image"].crop(crop_box)
print eachcomponent.tag
cropped_image = images[comp_page]["image"].crop(crop_box)
result = self.__ocrIt(cropped_image,
form_uuid,
resourcename,
linenum,
content_type="optionbox")
if result:
print "TRUE: %s" % comp_value
linenum+=1
elif component_type == "textbox":
linenum = 1
for eachcomponent in components:
comp_x = float(eachcomponent.attrib.get("x"))
comp_y = float(eachcomponent.attrib.get("y"))
comp_boxes = int(eachcomponent.attrib.get("boxes"))
comp_side = float(eachcomponent.attrib.get("side"))
comp_page = int(eachcomponent.attrib.get("page"))
comp_meta = str(eachcomponent.text)
try:
page_origin = images[comp_page]["markers"]
except(KeyError):
self.r.error(501,
self.T("insufficient number of pages provided"))
print eachcomponent.tag
crop_box = (
int(page_origin[0][0]+\
(comp_x*\
images[comp_page]["scalefactor"]["x"])),
int(page_origin[0][1]+\
(comp_y*\
images[comp_page]["scalefactor"]["y"])),
int(page_origin[0][0]+\
(comp_x*\
images[comp_page]["scalefactor"]["x"])+\
comp_side*comp_boxes*images[comp_page]["scalefactor"]["x"]),
int(page_origin[0][1]+\
(comp_y*\
images[comp_page]["scalefactor"]["y"])+\
comp_side*images[comp_page]["scalefactor"]["y"]),
)
cropped_image = images[comp_page]["image"].crop(crop_box)
output = self.__ocrIt(cropped_image,
form_uuid,
resourcename,
linenum)
print output
linenum+=1
else:
continue
output = etree.tostring(layout_etree, pretty_print=True)
return output #"%s %s %s" % (markers, orientation, scalefactor)
def __ocrIt(self,
image,
form_uuid,
resourcename,
linenum,
content_type="textbox"):
""" put Tesseract into work, actual OCRing will be done here """
if content_type=="optionbox":
stat = ImageStat.Stat(image)
if stat.mean[0] < 96 :
return True
elif content_type=="textbox":
uniqueuuid = uuid.uuid1() # to make it thread safe
inputfilename = "%s_%s_%s_%s.tif" % (uniqueuuid,
form_uuid,
resourcename,
linenum)
outputfilename = "%s_%s_%s_%s_text" % (uniqueuuid,
form_uuid,
resourcename,
linenum)
web2pywd = os.getcwd()
os.chdir(os.path.join(web2pywd, "applications/eden/uploads"))
try:
os.mkdir("ocr_temp")
except(OSError):
pass
uploadwd = os.getcwd()
os.chdir(os.path.join(uploadwd, "ocr_temp"))
image.save(inputfilename)
success =\
os.system("tesseract \"%s\" \"%s\" -psm 7 >> /dev/null" % (inputfilename,
outputfilename))
if success != 0:
self.r.error(501, self.T("Tesseract not installed"))
outputfile = open("%s.txt" % outputfilename)
outputtext = outputfile.read()
outputfile.close()
output = outputtext.replace("\n", " ")
os.remove(inputfilename)
os.remove("%s.txt" % outputfilename)
os.chdir(uploadwd)
os.removedirs("ocr_temp")
os.chdir(web2pywd)
return output
def __convertImage2binary(self, image, threshold = 180):
""" Converts the image into binary based on a threshold. here it is 180"""
image = ImageOps.grayscale(image)
image.convert("L")
width, height = image.size
for x in xrange(width):
for y in xrange(height):
if image.getpixel((x,y)) < 180 :
image.putpixel((x,y), 0)
else:
image.putpixel((x,y), 255)
return image
def __findRegions(self, im):
"""
Return the list of regions which are found by the following algorithm.
-----------------------------------------------------------
Raster Scanning Algorithm for Connected Component Analysis:
-----------------------------------------------------------
On the first pass:
=================
1. Iterate through each element of the data by column, then by row (Raster Scanning)
2. If the element is not the background
1. Get the neighboring elements of the current element
2. If there are no neighbors, uniquely label the current element and continue
3. Otherwise, find the neighbor with the smallest label and assign it to the current element
4. Store the equivalence between neighboring labels
On the second pass:
===================
1. Iterate through each element of the data by column, then by row
2. If the element is not the background
1. Relabel the element with the lowest equivalent label
( source: http://en.wikipedia.org/wiki/Connected_Component_Labeling )
"""
width, height = im.size
ImageOps.grayscale(im)
im = im.convert("L")
regions = {}
pixel_region = [[0 for y in xrange(height)] for x in xrange(width)]
equivalences = {}
n_regions = 0
#first pass. find regions.
for x in xrange(width):
for y in xrange(height):
#look for a black pixel
if im.getpixel((x, y)) == 0 : #BLACK
# get the region number from north or west or create new region
region_n = pixel_region[x-1][y] if x > 0 else 0
region_w = pixel_region[x][y-1] if y > 0 else 0
#region_nw = pixel_region[x-1][y-1] if x > 0 and y > 0 else 0
#region_ne = pixel_region[x-1][y+1] if x > 0 else 0
max_region = max(region_n, region_w)
if max_region > 0:
#a neighbour already has a region, new region is the smallest > 0
new_region = min(filter(lambda i: i > 0, (region_n, region_w)))
#update equivalences
if max_region > new_region:
if max_region in equivalences:
equivalences[max_region].add(new_region)
else:
equivalences[max_region] = set((new_region, ))
else:
n_regions += 1
new_region = n_regions
pixel_region[x][y] = new_region
#Scan image again, assigning all equivalent regions the same region value.
for x in xrange(width):
for y in xrange(height):
r = pixel_region[x][y]
if r > 0:
while r in equivalences:
r = min(equivalences[r])
if r in regions:
regions[r].add(x, y)
else:
regions[r] = self.__Region(x, y)
return list(regions.itervalues())
def __getOrientation(self, markers):
""" Returns orientation of the sheet in radians """
x1, y1 = markers[0]
x2, y2 = markers[2]
try:
slope = ((x2-x1)*1.0) / ((y2-y1)*1.0)
except(ZeroDivisionError):
slope = 999999999999999999999999999
return math.atan(slope)*(180.0/math.pi)*(-1)
def __scaleFactor(self, markers):
""" Returns the scale factors lengthwise and breadthwise """
stdWidth = sum((596, -60))
stdHeight = sum((842, -60))
li = [markers[0], markers[2]]
sf_y = self.__distance(li)/stdHeight
li = [markers[6], markers[2]]
sf_x = self.__distance(li)/stdWidth
return {"x":sf_x, "y":sf_y}
def __distance(self, li):
""" returns the euclidean distance if the input is of the form [(x1, y1), (x2, y2)]"""
return math.sqrt(math.fsum((math.pow(math.fsum((int(li[1][0]), -int(li[0][0]))), 2), math.pow(math.fsum((int(li[1][1]), -int(li[0][1]))), 2))))
def __getMarkers(self, image):
""" Gets the markers on the OCR image """
centers = {}
present = 0
regions = self.__findRegions(image)
for r in regions:
if r.area > 320 and r.aspectratio() < 1.5 and r.aspectratio() > 0.67:
present += 1
centers[present] = r.centroid()
# This is the list of all the markers on the form.
markers = list(centers.itervalues())
markers.sort()
l1 = sorted(markers[0:3], key=lambda y: y[1])
l2 = markers[3:4]
l3 = sorted(markers[4:7], key=lambda y: y[1])
markers = []
markers.extend(l1)
markers.extend(l2)
markers.extend(l3)
#markers.sort(key=lambda x: (x[0], x[1]))
return markers
class __Region():
""" Self explainatory """
def __init__(self, x, y):
""" Initialize the region """
self._pixels = [(x, y)]
self._min_x = x
self._max_x = x
self._min_y = y
self._max_y = y
self.area = 1
def add(self, x, y):
""" Add a pixel to the region """
self._pixels.append((x, y))
self.area += 1
self._min_x = min(self._min_x, x)
self._max_x = max(self._max_x, x)
self._min_y = min(self._min_y, y)
self._max_y = max(self._max_y, y)
def centroid(self):
""" Returns the centroid of the bounding box """
return ((self._min_x + self._max_x)/2 , (self._min_y + self._max_y)/2)
def box(self):
""" Returns the bounding box of the region """
return [ (self._min_x, self._min_y) , (self._max_x, self._max_y)]
def aspectratio(self):
""" Calculating the aspect ratio of the region """
width = self._max_x - self._min_x
length = self._max_y - self._min_y
return float(width)/float(length)
#==============================================================================
#==================== unicode support to reportlab ============================
#==============================================================================
fonts_directory = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"../../static/fonts")
#------------------------------------------------------------------------------
# unifont - considered to be an allrounder
#------------------------------------------------------------------------------
try:
pdfmetrics.registerFont(TTFont("unifont",
os.path.join(fonts_directory,
"unifont/unifont.ttf")))
unifont_map = [
(0, 65536),
]
except:
unifont_map = []
print >> sys.stderr, "S3 Debug: s3ocr: unifont not found, run static/fonts/setfonts.py"
#------------------------------------------------------------------------------
# Arabic fonts
#------------------------------------------------------------------------------
try:
pdfmetrics.registerFont(TTFont("AlMateen-Bold",
os.path.join(fonts_directory,
"arabic/ae_AlMateen-Bold.ttf")))
from fontmap.AlMateenBold import AlMateenBold_map
pdfmetrics.registerFont(TTFont("AlMohanad",
os.path.join(fonts_directory,
"arabic/ae_AlMohanad.ttf")))
from fontmap.AlMohanad import AlMohanad_map
except:
AlMateenBold_map = []
AlMohanad_map = []
print >> sys.stderr, "S3 Debug: s3ocr: arabic fonts not found, run static/fonts/setfonts.py"
#------------------------------------------------------------------------------
# japanese fonts
#------------------------------------------------------------------------------
try:
pdfmetrics.registerFont(TTFont("SazanamiGothic",
os.path.join(fonts_directory,
"japanese/sazanami-gothic.ttf")))
from fontmap.SazanamiGothic import SazanamiGothic_map
pdfmetrics.registerFont(TTFont("SazanamiMincho",
os.path.join(fonts_directory,
"japanese/sazanami-mincho.ttf")))
from fontmap.SazanamiMincho import SazanamiMincho_map
except:
SazanamiGothic_map = []
SazanamiMincho_map = []
print >> sys.stderr, "S3 Debug: s3ocr: japanese fonts not found, run static/fonts/setfonts.py"
#--------------------------------------------------------------------------
# Standard fonts
#--------------------------------------------------------------------------
Helvetica = "Helvetica"
Helvetica_map = [
(32, 127),
(160, 161),
(173, 173),
]
# Fonts
#Courier = "Courier"
#Helvetica_Bold = "Helvetica-Bold"
#Helvetica_Bold_Oblique = "Helvetica-BoldOblique"
#Helvetica_Oblique = "Helvetica-Oblique"
#--------------------------------------------------------------------------
# some global variables
#--------------------------------------------------------------------------
fontlist = [
"Helvetica", # english and latin english fonts
"AlMateen-Bold", # arabic fonts
"AlMohanad", # arabic fonts
"SazanamiGothic", # japanese fonts
"SazanamiMincho", # japanese fonts
"unifont", # unifont should be always at the last
]
fontmapping = {
"Helvetica": Helvetica_map,
"AlMateen-Bold": AlMateenBold_map,
"AlMohanad": AlMohanad_map,
"SazanamiGothic": SazanamiGothic_map,
"SazanamiMincho": SazanamiMincho_map,
"unifont": unifont_map,
}
fontchecksequence = []
for eachfont in fontlist:
if len(fontmapping[eachfont]) != 0:
fontchecksequence.append(eachfont)
#==========================================================================
#=============== internal Class Definitions and functions =================
#==========================================================================
#======================== pdf layout from xform ===========================
class Form(object):
""" Form class to use reportlab to generate pdf """
def __init__(self, pdfname="ocrform.pdf", margintop=65, marginsides=50,
**kw):
""" Form initialization """
self.pdfpath = kw.get("pdfpath", pdfname)
self.verbose = kw.get("verbose", 0)
self.linespacing = kw.get("linespacing", 4)
self.font = kw.get("typeface", "Helvetica")
self.fontsize = kw.get("fontsize", 13)
self.IObuffer = StringIO()
self.canvas = Canvas(self.IObuffer, pagesize = A4)
self.width, self.height = A4
self.x = marginsides
self.lastx = marginsides
self.marginsides = marginsides
self.margintop = margintop
self.y = self.height - margintop
self.lasty = self.height - margintop
self.num = 1
self.gray = 0
self.pagebegin = 1
self.form_uuid = kw.get("form_uuid" ,"")
self.form_revision = kw.get("form_revision" ,"")
self.form_resourcename = kw.get("form_resourcename" ,"")
self.put_page_num()
self.put_metainfo()
def barcode(self, uuid):
""" Generate barcode of uuid """
barcode = code128.Code128(str(uuid), barWidth=1, barHeight=20)
barcode.drawOn(self.canvas, self.lastx, self.lasty)
self.lasty = self.lasty - 20
self.y = self.lasty
def decorate(self):
""" Decorates the the form with the markers needed to align the form later """
c = self.canvas
c.rect(20, 20, 20, 20, fill=1) # bt lf
c.rect(self.width - 40, 20, 20, 20, fill=1) # bt rt
c.rect(20, self.height - 40, 20, 20, fill=1) # tp lf
c.rect(self.width/2 - 10, 20, 20, 20, fill=1) # bt md
c.rect(20, self.height/2 - 10, 20, 20, fill=1) # md lf
c.rect(self.width - 40, self.height - 40, 20, 20, fill=1) # tp rt
c.rect(self.width - 40, self.height/2 - 10, 20, 20, fill=1) # md rt
self.origin = {"x": 29, "y": 29} # location of top left marker
def print_text(self,
lines,
fontsize=13,
gray=0,
seek=0,
continuetext=0,
style="default"):
"""
Give the lines to be printed as a list,
set the font and grey level
"""
self.fontsize = fontsize
self.gray = gray
if not continuetext and not self.pagebegin:
self.resetx()
self.nextline()
self.pagebegin = 0
if seek:
self.resetx(seek=seek)
numlines = len(lines)
loopcounter = 0
for line in lines:
loopcounter += 1
line = self.__html_unescape(unicode(line))
# alignment
if not continuetext:
if style == "center":
self.x = \
(self.width - (len(line) * (self.fontsize / 2)))/2
elif style == "right":
self.x = \
((self.width - self.marginsides) -\
((len(line)+3) * (self.fontsize / 2)))
if continuetext:
# wrapping multiline options
if (self.width - self.marginsides - self.x) < 100:
self.resetx()
self.nextline()
if (self.y - self.fontsize) < 50:
self.set_new_page()
for char in line:
t = self.writechar(char)
self.x = t.getX()
self.y = t.getY()
# text wrapping -> TODO: word wrapping
if self.x > (self.width - self.marginsides - self.fontsize):
self.writechar("-")
self.nextline()
self.resetx(self.fontsize)
if not continuetext and loopcounter != numlines:
self.nextline()
self.resetx()
def writechar(self, char=" "):
"""
Writes one character on canvas
"""
font=self.selectfont(char)
t = self.canvas.beginText(self.x, self.y)
t.setFont(font, self.fontsize)
t.setFillGray(self.gray)
t.textOut(char)
self.canvas.drawText(t)
return t
def nextline(self, fontsize=0):
"""
Moves the y cursor down one line
"""
if fontsize != 0:
self.fontsize = fontsize
if self.pagebegin == 0:
self.y = self.y - (self.fontsize + self.linespacing)
if self.y < self.margintop:
self.set_new_page()
self.pagebegin = 0
def resetx(self, offset=0, seek=None):
"""
Moves the x cursor with offset
"""
if seek == None:
self.x = self.marginsides + offset
else:
self.x += seek
lastvalidx = self.width - (self.marginsides + (self.fontsize / 2))
writablex = self.width - (2 * self.marginsides)
if self.x > lastvalidx:
currentx = self.x - self.marginsides
remx = currentx % writablex
self.x = remx + self.marginsides
numlines = int(currentx / writablex)
for line in xrange(numlines):
self.nextline()
def __html_unescape(self, text):
"""
Helper function, unscape any html special characters
"""
return re.sub("&(%s);" % "|".join(name2codepoint),
lambda m: unichr(name2codepoint[m.group(1)]),
text)
def linespace(self, spacing=2):
"""
Moves the y cursor down by given units
"""
if self.pagebegin == 0:
self.y -= spacing
self.pagebegin = 0
def selectfont(self, char):
""" Select font according to the input character """
charcode = ord(char)
for font in fontchecksequence:
for fontrange in fontmapping[font]:
if charcode in xrange(fontrange[0], fontrange[1]):
return font
return "Helvetica" # fallback, if no thirdparty font is installed
def draw_check_boxes(self,
boxes=1,
completeline=0,
lines=0,
seek=0,
continuetext=0,
fontsize=15,
gray=0,
style="",
):
""" Function to draw check boxes default no of boxes = 1 """
if not continuetext and not self.pagebegin:
self.resetx()
self.nextline()
self.pagebegin = 0
self.fontsize = fontsize
c = self.canvas
c.setLineWidth(0.90)
c.setStrokeGray(gray)
if style == "center":
self.x = self.width / 2
elif style == "right":
self.x = self.width - self.marginsides - self.fontsize
if seek > (self.width - (self.marginsides + self.fontsize)):
seek = 0
if (self.y - self.fontsize) < 40:
self.set_new_page()
#if continuetext == 1:
# self.y = self.y + self.fontsize
# self.x = self.lastx
#else:
# self.x = self.marginsides
if seek != 0:
self.x = self.x + seek
if fontsize == 0:
fontsize = self.fontsize
else:
self.fontsize = fontsize
if completeline == 1:
boxes = int(self.width / self.fontsize)
box_startpx = {
"x": self.x - self.origin["x"],
"y": (842-self.y-self.fontsize) - self.origin["y"],
"side": self.fontsize - 1,
"boxes": boxes,
"page": self.num
}
for i in range(boxes):
c.rect(self.x, self.y, self.fontsize, self.fontsize)
self.x = self.x + self.fontsize
if self.x > (self.width - (self.marginsides + self.fontsize)):
break
self.lastx = self.x
#self.x = self.marginsides
#self.y = self.y - self.fontsize
#if isdate:
# t = c.beginText(self.x, self.y)
# t.setFont(Helvetica, 13)
# t.setFillGray(0)
# t.textOut(" D D M M Y Y Y Y")
# c.drawText(t)
# self.y = self.y - fontsize
# self.lastx = t.getX()
# self.lasty = self.y
#if isdatetime:
# t = c.beginText(self.x, self.y)
# t.setFont(Helvetica, 12.5)
# t.setFillGray(0.4)
# t.textOut(" D D M M Y Y Y Y -H H :M M")
# c.drawText(t)
# self.y = self.y - fontsize
# self.lastx = t.getX()
# self.lasty = self.y
self.lastx = self.x
return box_startpx
def draw_circle(self,
boxes=1,
completeline=0,
lines=0,
seek=0,
continuetext=0,
fontsize=0,
gray=0,
style=""):
""" Draw circles on the form """
c = self.canvas
c.setLineWidth(0.90)
c.setStrokeGray(gray)
self.resetx(seek=seek)
#if style == "center":
# self.x = self.width / 2
#elif style == "right":
# self.x = self.width - self.marginsides - self.fontsize
#if seek > (self.width - (self.marginsides + self.fontsize)):
# seek = 0
#if (self.y - self.fontsize) < 40:
# self.set_new_page()
#if continuetext == 1:
# self.y = self.y + self.fontsize
# self.x = self.lastx
#else:
# self.x = self.marginsides
#if seek != 0:
# self.x = self.x + seek
#if fontsize == 0:
# fontsize = self.fontsize
#else:
# self.fontsize = fontsize
#if completeline == 1:
# boxes = int(self.width / self.fontsize)
circle_center = {
"x": (self.x + self.fontsize/2) - self.origin["x"],
"y": (842 - self.y - self.fontsize/2) - self.origin["y"],
"radius": self.fontsize/2,
"boxes" : boxes,
"page": self.num
}
for eachcircle in xrange(boxes):
c.circle(self.x + self.fontsize/2, self.y + self.fontsize/2,
self.fontsize/2, fill = 0)
self.resetx(seek=self.fontsize)
self.resetx(seek=seek)
# if self.x > (self.width - (self.marginsides + self.fontsize)):
# break
#self.lastx = self.x
#self.x = self.marginsides
#self.y = self.y - self.fontsize
return circle_center
def draw_line(self, gray=0, nextline=0):
""" Function to draw a straight line """
self.fontsize = 4
if nextline:
self.nextline()
else:
self.linespace(8)
self.resetx()
c = self.canvas
c.setStrokeGray(gray)
c.setLineWidth(1)
#self.y = self.y + self.linespacing + (self.fontsize/2)
c.line(self.x, self.y, self.width - self.x, self.y)
self.y = self.y + (self.linespacing)
def set_new_page(self):
"""
All changes are forgotten when a showPage() has been executed.
They have to be set again.
"""
self.num += 1
c = self.canvas
c.showPage()
self.decorate()
self.x = self.marginsides
self.lastx = self.marginsides
self.y = self.height - self.margintop
#self.print_text(["Page %s" % unicode(self.num)], fontsize=8,
# style="right")
self.put_page_num()
self.put_metainfo()
#self.x = self.marginsides
#self.lastx = self.x
#self.y = self.y - 32
self.pagebegin = 1
def put_metainfo(self):
# preserve state
x, y = self.x, self.y
fontsize = self.fontsize
# do the job
self.fontsize = 10
uuid_text = "UUID: %s" % self.form_uuid
rest_text = "Revision: %s Resource: %s" % (self.form_revision,
self.form_resourcename)
self.x = self.marginsides
self.y = 25
for char in uuid_text:
t = self.writechar(char)
self.x = t.getX()
self.y = t.getY()
self.x = (self.width/2) + 20
self.y = 25
for char in rest_text:
t = self.writechar(char)
self.x = t.getX()
self.y = t.getY()
# restore state
self.fontsize = fontsize
self.x, self.y = x, y
def put_page_num(self):
# preserve state
x, y = self.x, self.y
fontsize = self.fontsize
# do the job
self.fontsize = 10
text = "page%s" % self.num
self.x = self.width - \
(((len(text)+2)*(self.fontsize/2)) + self.marginsides)
self.y = 25
for char in text:
t = self.writechar(char)
self.x = t.getX()
self.y = t.getY()
# restore state
self.fontsize = fontsize
self.x, self.y = x, y
def set_title(self, title = "FORM"):
""" Sets the title of the pdf. """
c = self.canvas.setTitle(title)
def save(self):
""" Saves the form """
self.canvas.save()
pdf = self.IObuffer.getvalue()
self.IObuffer.close()
return pdf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment