|
# Designed to be pasted in a Django shell session |
|
# Bug 1293749 - Add missing UNIQUE KEY indexes in production |
|
# Some tags are duplicates according to collation, such as "Tag" and "tag" |
|
# Prefer the tag with the most usage, falling back to smallest ID |
|
# Drop unused tags, to simplify tag administration |
|
|
|
tag_relations = ( |
|
(Tag, 'taggit_taggeditem_items', 'tags'), |
|
(DocumentTag, 'wiki_taggeddocument_items', 'tags'), |
|
(LocalizationTag, 'wiki_localizationtaggedrevision_items', 'localization_tags'), |
|
(ReviewTag, 'wiki_reviewtaggedrevision_items', 'review_tags'), |
|
) |
|
|
|
|
|
def scan_tags(model, relation_name): |
|
model_name = model._meta.object_name |
|
counts = {} |
|
dupe_ids = {} |
|
empty_tag_ids = [] |
|
max_id = 0 |
|
# Count items for all tags, find max ID, find duplicates, maybe delete unused |
|
for tag in model.objects.all(): |
|
max_id = max(max_id, tag.id) |
|
rel_count = getattr(tag, relation_name).count() |
|
counts[tag.id] = rel_count |
|
if not rel_count: |
|
empty_tag_ids.append(tag.id) |
|
dupe_tags = model.objects.filter(name=tag.name) |
|
if dupe_tags.count() > 1: |
|
dupe_ids[tag.id] = list(dupe_tags.values_list('id', flat=True)) |
|
# Pick replacements tags for duplicates |
|
max_id += 1 |
|
replace_tag_ids = {} |
|
for orig_id, duplicate_ids in dupe_ids.items(): |
|
if counts[orig_id]: |
|
# Winner has most relations, or lowest ID |
|
winner = max([(counts[tag_id], max_id - tag_id, tag_id) for tag_id in duplicate_ids])[2] |
|
if winner != orig_id: |
|
replace_tag_ids[orig_id] = winner |
|
return empty_tag_ids, replace_tag_ids |
|
|
|
|
|
def drop_empty_tags(model, empty_tag_ids, dry_run): |
|
model_name = model._meta.object_name |
|
for tag_id in empty_tag_ids: |
|
tag = model.objects.get(id=tag_id) |
|
print('Dropping %s "%s" with no related items' % (model_name, tag.name.encode('utf8'))) |
|
if not dry_run: |
|
tag.delete() |
|
|
|
|
|
def move_tags(model, relation_name, reverse_relation_name, replace_tag_ids, dry_run): |
|
for orig_id, new_id in replace_tag_ids.items(): |
|
orig_tag = model.objects.get(id=orig_id) |
|
new_tag = model.objects.get(id=new_id) |
|
orig_name = orig_tag.name.encode('utf8') |
|
new_name = new_tag.name.encode('utf8') |
|
print('Replace "%s" with "%s"' % (orig_name, new_name)) |
|
orig_relations = list(getattr(orig_tag, relation_name).all()) |
|
for orig_rel in orig_relations: |
|
obj = orig_rel.content_object |
|
tags = getattr(obj, reverse_relation_name) |
|
print(' Removing "%s" from tags for %s' % (orig_name, obj)) |
|
if not dry_run: |
|
tags.remove(orig_tag) |
|
print(' Adding "%s" to tags for %s' % (new_name, obj)) |
|
if not dry_run: |
|
tags.add(new_tag) |
|
print(' Deleting "%s"' % (orig_name)) |
|
if not dry_run: |
|
orig_tag.delete() |
|
|
|
def process_tags(tag_relations, dry_run=True): |
|
for model, relation_name, reverse_relation_name in tag_relations: |
|
empty_tag_ids, replace_tag_ids = scan_tags(model, relation_name) |
|
drop_empty_tags(model, empty_tag_ids, dry_run) |
|
move_tags(model, relation_name, reverse_relation_name, replace_tag_ids, dry_run) |
|
|
|
process_tags(tag_relations) |