Last active
June 21, 2018 23:21
-
-
Save hvelarde/288d82fa046bf72bd3c151f25685247c to your computer and use it in GitHub Desktop.
Clean up least used catalog keywords
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from plone import api | |
import transaction | |
def get_least_used(limit=1): | |
"""List all keywords used up to limit times in content.""" | |
catalog = api.portal.get_tool('portal_catalog') | |
keywords = catalog.uniqueValuesFor('Subject') | |
least_used = [] | |
for k in keywords: | |
results = catalog(Subject=k) | |
if len(results) <= limit: | |
least_used.append(k) | |
return least_used | |
def remove_least_used(limit=1): | |
"""Remove all keywords used up to limit times in content.""" | |
catalog = api.portal.get_tool('portal_catalog') | |
results = catalog() | |
least_used = get_least_used(limit) | |
n = 0 | |
for i, b in enumerate(results, 1): | |
try: | |
obj = b.getObject() | |
except (AttributeError, KeyError): | |
continue # skip broken objects | |
keywords = list(obj.Subject()) | |
# remove keywords used up to limit times | |
cleanup = [k for k in keywords if k not in least_used] | |
if keywords == cleanup: | |
continue # no changes on object | |
obj.setSubject(cleanup) | |
catalog.catalog_object(obj, idxs=['Subject'], update_metadata=False) | |
n += 1 | |
if n % 1000 == 0: | |
transaction.savepoint() | |
transaction.commit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment