Skip to content

Instantly share code, notes, and snippets.

Created February 14, 2020 10:26
Show Gist options
  • Save gforcada/089ba00537e88983d8e3ed1d5560a744 to your computer and use it in GitHub Desktop.
Save gforcada/089ba00537e88983d8e3ed1d5560a744 to your computer and use it in GitHub Desktop.
Normalize all fields of any content type being created (one has to hook that to an event handler)
# -*- coding: utf-8 -*-
from import RichText
from import RichTextValue
from plone.behavior.interfaces import IBehavior
from plone.dexterity.interfaces import IDexterityFTI
from zope.component import getUtility
from zope.schema import Text
from zope.schema import TextLine
import unicodedata
def text_fields(obj, event):
"""Event handler that normalizes text fields of objects.
W3C recommends the use of Unicode normalization form C (NFC) on the web.
Our current font (TheAntiquaF) does not support, at least, the combining
diaeresis, and that brings rendering problems on some browsers, see:
This handler makes sure that all text fields are on NFC form.
changed = _normalize_title(obj)
changed |= _normalize_description(obj)
changed |= _normalize_subject(obj)
for field_name, field_factory in _fields(obj.portal_type):
if isinstance(field_factory, (TextLine, Text)):
changed |= _normalize_text_field(obj, field_name)
elif isinstance(field_factory, RichText):
changed |= _normalize_richtext_field(obj, field_name, field_factory)
# only reindex if an attribute has changed
if changed:
def _normalize_title(obj):
"""Title is an encoded string and has its own setter"""
changed = False
data = getattr(obj, 'title', '')
if data:
result = normalize(data)
if result != data:
changed = True
return changed
def _normalize_description(obj):
"""Description is an encoded string and has its own setter"""
changed = False
data = getattr(obj, 'description', '')
if data:
result = normalize(data)
if result != data:
changed = True
return changed
def _normalize_subject(obj):
"""Subject (tags) is a tuple of encoded strings and has its own setter"""
subjects_changed = False
data = getattr(obj, 'subject', ())
if len(data) > 0:
subjects = []
for subject in data:
result = normalize(subject)
if result != subject:
subjects_changed = True
if subjects_changed:
changed = False
if subjects_changed:
changed = True
return changed
def _normalize_text_field(obj, attribute):
"""Normalize the given attribute of the given object"""
changed = False
data = getattr(obj, attribute, '')
if data:
result = normalize(data)
if result != data:
changed = True
setattr(obj, attribute, result)
return changed
def _normalize_richtext_field(obj, attribute, field_factory):
"""Normalize the given richtext attribute of the given object"""
changed = False
data = getattr(obj, attribute, '')
if data:
result = normalize(data.raw)
if result != data.raw:
changed = True
result = RichTextValue(
setattr(obj, attribute, result)
return changed
def _fields(portal_type):
"""Get all fields and fields' descriptions from the given portal type"""
fti = getUtility(IDexterityFTI, name=portal_type)
schema = fti.lookupSchema()
fields = schema.namesAndDescriptions(all=True)
for behavior_name in fti.behaviors:
factory = getUtility(IBehavior, behavior_name)
behavior = factory.interface
fields += behavior.namesAndDescriptions()
return fields
def normalize(text):
"""Helper method to keep text normalized.
:param text: a string of text, either unicode or a normal string
:returns: the normalized version of 'text' as a unicode string
result = text
if isinstance(text, unicode):
result = unicodedata.normalize('NFC', text)
elif isinstance(text, str):
result = unicodedata.normalize('NFC', text.decode('utf-8'))
# remove control characters, on text fields they do more harm than good
# see
# one liner from
if isinstance(text, (unicode, str)):
result = ''.join(x for x in result if unicodedata.category(x)[0] != 'C')
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment