Skip to content

Instantly share code, notes, and snippets.

@acdha
Created December 30, 2010 21:16
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save acdha/760311 to your computer and use it in GitHub Desktop.
Save acdha/760311 to your computer and use it in GitHub Desktop.
Django haystack prepare() method using proposed backend.extract() for rich-content handling
class MyIndex(SearchIndex):
text = fields.CharField(document=True, use_template=False)
def prepare(self, obj):
data = super(MyIndex, self).prepare(obj)
extracted_data = self.backend.extract(open(obj.file_field.path, "rb"))
if extracted_data is not None:
for k, v in extracted_data['metadata'].items():
data["attr_%s" % k] = k
else:
self.log.warning("Metadata extraction failed for %s", obj)
# Now we'll allow human-entered data to overwrite any conflicting
# values:
for k, v in obj.attributes.items():
data["attr_%s" % k] = v
# Now we'll finally perform the template processing to render the
# text field with *all* of our metadata visible for templating:
t = loader.select_template(('search/indexes/myapp/mymodel_text.txt',))
data['text'] = t.render(Context({'object': obj,
'extracted': extracted_data}))
return data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment