Skip to content

Instantly share code, notes, and snippets.

@mathjazz
Last active March 12, 2021 14:00
Show Gist options
  • Save mathjazz/16e26edad30963881320664eda458e94 to your computer and use it in GitHub Desktop.
Save mathjazz/16e26edad30963881320664eda458e94 to your computer and use it in GitHub Desktop.
tm.py
import time
from pontoon.base.models import TranslationMemoryEntry, Translation
from bulk_update.helper import bulk_update
tm_entries_to_update = []
"""
Generate a dict of TM entries for which translation ForeignKey is null
- key: serialized TM entry data
- value: reference to a TM entry
"""
tm_entries_dict = {}
tm_entries_without_translation = TranslationMemoryEntry.objects.filter(translation__isnull=True)
tm_entries_without_translation_pks = list(tm_entries_without_translation.values_list("pk"))
tm_entries_without_translation_values = list(tm_entries_without_translation.values_list("entity", "locale", "source", "target"))
for i, value in enumerate(tm_entries_without_translation_values):
tm_entries_dict[unicode(value[0]) + "\x04" + unicode(value[1]) + "\x04" + unicode(value[2]) + "\x04" + unicode(value[3])] = tm_entries_without_translation_pks[i]
"""
Loop over all translations without TM entries, find matching TM entries and assign translations to them
"""
translations_not_in_tm = Translation.objects.filter(approved=True, memory_entries__isnull=True)
translations_not_in_tm_pks = list(translations_not_in_tm.values_list("pk"))
translations_not_in_tm_values = list(translations_not_in_tm.values_list("entity", "locale", "entity__string", "string"))
for i, value in enumerate(translations_not_in_tm_values):
try:
tm_entry = tm_entries_dict[unicode(value[0]) + "\x04" + unicode(value[1]) + "\x04" + unicode(value[2]) + "\x04" + unicode(value[3])]
tm_entries_to_update.append(TranslationMemoryEntry(
pk=tm_entry,
translation_id=translations_not_in_tm_pks[i][0]
))
except KeyError:
pass
for i in range(0, len(tm_entries_to_update)/1000):
print i
bulk_update(tm_entries_to_update[i*1000:(i*1000+1000)], update_fields=['translation'])
time.sleep(1)
"""
len(tm_entries_without_translation_pks)
len(translations_not_in_tm_pks)
len(tm_entries_to_update)
"""
@jotes
Copy link

jotes commented May 31, 2017

I would order_by() those queries to be sure that indexes will match.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment