Skip to content

Instantly share code, notes, and snippets.

@hvelarde
Last active June 21, 2018 23:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hvelarde/288d82fa046bf72bd3c151f25685247c to your computer and use it in GitHub Desktop.
Save hvelarde/288d82fa046bf72bd3c151f25685247c to your computer and use it in GitHub Desktop.
Clean up least used catalog keywords
# -*- coding: utf-8 -*-
from plone import api
import transaction
def get_least_used(limit=1):
"""List all keywords used up to limit times in content."""
catalog = api.portal.get_tool('portal_catalog')
keywords = catalog.uniqueValuesFor('Subject')
least_used = []
for k in keywords:
results = catalog(Subject=k)
if len(results) <= limit:
least_used.append(k)
return least_used
def remove_least_used(limit=1):
"""Remove all keywords used up to limit times in content."""
catalog = api.portal.get_tool('portal_catalog')
results = catalog()
least_used = get_least_used(limit)
n = 0
for i, b in enumerate(results, 1):
try:
obj = b.getObject()
except (AttributeError, KeyError):
continue # skip broken objects
keywords = list(obj.Subject())
# remove keywords used up to limit times
cleanup = [k for k in keywords if k not in least_used]
if keywords == cleanup:
continue # no changes on object
obj.setSubject(cleanup)
catalog.catalog_object(obj, idxs=['Subject'], update_metadata=False)
n += 1
if n % 1000 == 0:
transaction.savepoint()
transaction.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment