Skip to content

Instantly share code, notes, and snippets.

@gforcada
Created February 14, 2020 10:26
Show Gist options
  • Save gforcada/089ba00537e88983d8e3ed1d5560a744 to your computer and use it in GitHub Desktop.
Save gforcada/089ba00537e88983d8e3ed1d5560a744 to your computer and use it in GitHub Desktop.
Normalize all fields of any content type being created (one has to hook that to an event handler)
# -*- coding: utf-8 -*-
from plone.app.textfield import RichText
from plone.app.textfield.value import RichTextValue
from plone.behavior.interfaces import IBehavior
from plone.dexterity.interfaces import IDexterityFTI
from zope.component import getUtility
from zope.schema import Text
from zope.schema import TextLine
import unicodedata
def text_fields(obj, event):
"""Event handler that normalizes text fields of objects.
W3C recommends the use of Unicode normalization form C (NFC) on the web.
See http://www.w3.org/TR/charmod-norm/#sec-UnicodeNormalized
Our current font (TheAntiquaF) does not support, at least, the combining
diaeresis, and that brings rendering problems on some browsers, see:
https://bugzilla.mozilla.org/show_bug.cgi?id=940944
This handler makes sure that all text fields are on NFC form.
"""
changed = _normalize_title(obj)
changed |= _normalize_description(obj)
changed |= _normalize_subject(obj)
for field_name, field_factory in _fields(obj.portal_type):
if isinstance(field_factory, (TextLine, Text)):
changed |= _normalize_text_field(obj, field_name)
elif isinstance(field_factory, RichText):
changed |= _normalize_richtext_field(obj, field_name, field_factory)
# only reindex if an attribute has changed
if changed:
obj.reindexObject()
def _normalize_title(obj):
"""Title is an encoded string and has its own setter"""
changed = False
data = getattr(obj, 'title', '')
if data:
result = normalize(data)
if result != data:
changed = True
obj.setTitle(result.encode('utf-8'))
return changed
def _normalize_description(obj):
"""Description is an encoded string and has its own setter"""
changed = False
data = getattr(obj, 'description', '')
if data:
result = normalize(data)
if result != data:
changed = True
obj.setDescription(result.encode('utf-8'))
return changed
def _normalize_subject(obj):
"""Subject (tags) is a tuple of encoded strings and has its own setter"""
subjects_changed = False
data = getattr(obj, 'subject', ())
if len(data) > 0:
subjects = []
for subject in data:
result = normalize(subject)
if result != subject:
subjects_changed = True
subjects.append(result)
if subjects_changed:
obj.setSubject(tuple(subjects))
changed = False
if subjects_changed:
changed = True
return changed
def _normalize_text_field(obj, attribute):
"""Normalize the given attribute of the given object"""
changed = False
data = getattr(obj, attribute, '')
if data:
result = normalize(data)
if result != data:
changed = True
setattr(obj, attribute, result)
return changed
def _normalize_richtext_field(obj, attribute, field_factory):
"""Normalize the given richtext attribute of the given object"""
changed = False
data = getattr(obj, attribute, '')
if data:
result = normalize(data.raw)
if result != data.raw:
changed = True
result = RichTextValue(
raw=result,
mimeType=field_factory.default_mime_type,
outputMimeType=field_factory.output_mime_type,
)
setattr(obj, attribute, result)
return changed
def _fields(portal_type):
"""Get all fields and fields' descriptions from the given portal type"""
fti = getUtility(IDexterityFTI, name=portal_type)
schema = fti.lookupSchema()
fields = schema.namesAndDescriptions(all=True)
for behavior_name in fti.behaviors:
factory = getUtility(IBehavior, behavior_name)
behavior = factory.interface
fields += behavior.namesAndDescriptions()
return fields
def normalize(text):
"""Helper method to keep text normalized.
:param text: a string of text, either unicode or a normal string
:returns: the normalized version of 'text' as a unicode string
"""
result = text
if isinstance(text, unicode):
result = unicodedata.normalize('NFC', text)
elif isinstance(text, str):
result = unicodedata.normalize('NFC', text.decode('utf-8'))
# remove control characters, on text fields they do more harm than good
# see https://gitlab.com/der-freitag/zope/issues/2230
# one liner from https://stackoverflow.com/questions/4324790
if isinstance(text, (unicode, str)):
result = ''.join(x for x in result if unicodedata.category(x)[0] != 'C')
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment