Skip to content

Instantly share code, notes, and snippets.

@frague59
Created November 6, 2015 09:03
Show Gist options
  • Save frague59/aab071f0bdce5b010ce4 to your computer and use it in GitHub Desktop.
Save frague59/aab071f0bdce5b010ce4 to your computer and use it in GitHub Desktop.
search features with attachment supports
# -*- coding: utf-8 -*-
"""
Search features for an elasticsearch / haystack / elasticstack
:creationdate: 05/11/15 15:05
:moduleauthor: François GUÉRIN <fguerin@ville-tourcoing.fr>
:modulename: intrautils.search
"""
import base64
import json
import logging
import mimetypes
from django import forms
from django.conf import settings
from django.db.models.fields.files import FieldFile as dj_File
from django.templatetags.static import static
from django.utils.translation import ugettext_lazy as _
from elasticstack.backends import ConfigurableElasticBackend, ConfigurableElasticSearchEngine
from elasticstack.fields import FacetField
from elasticstack.forms import SearchForm
from filer.models import File as fi_File
from form_utils.forms import BetterForm
from haystack import DEFAULT_ALIAS
from haystack.constants import DJANGO_CT, DJANGO_ID
from haystack.fields import SearchField
from haystack.forms import model_choices
from utils import widgets as u_widgets
from utils.forms import CollapsibleFieldsetFormMixin
__author__ = 'fguerin'
logger = logging.getLogger('intrautils.search')
DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'}
FIELD_MAPPINGS = {
'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'},
'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'},
'date': {'type': 'date'},
'datetime': {'type': 'date'},
'location': {'type': 'geo_point'},
'boolean': {'type': 'boolean'},
'float': {'type': 'float'},
'long': {'type': 'long'},
'integer': {'type': 'long'},
'attachment': {'type': 'attachment'},
}
class ExtendedElasticsearchBackend(ConfigurableElasticBackend):
"""
Adds `attachment` support for elasticsearch backend settings
"""
def build_schema(self, fields):
"""
Merge from haystack and elasticstack elasticsearch backend `build_shema` methods.
It provides an additional feuture : custom field mappings, from settings or default FIELD_MAPPINGS dict.
:param fields:
:return:
"""
content_field_name = ''
mapping = {
DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False},
DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False},
}
field_mappings = getattr(settings, 'ELASTICSEARCH_FIELD_MAPPINGS', FIELD_MAPPINGS)
default_field_mappings = getattr(settings, 'ELASTICSEARCH_DEFAULT_FIELD_MAPPINGS', DEFAULT_FIELD_MAPPING)
for field_name, field_class in fields.items():
field_mapping = field_mappings.get(field_class.field_type, default_field_mappings).copy()
if field_class.boost != 1.0:
field_mapping['boost'] = field_class.boost
if field_class.document is True:
content_field_name = field_class.index_fieldname
# Do this last to override `text` fields.
if field_mapping['type'] == 'string' and field_class.indexed:
if not hasattr(field_class, 'facet_for') and not field_class.field_type in ('ngram', 'edge_ngram'):
field_mapping['analyzer'] = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)
mapping[field_class.index_fieldname] = field_mapping
return content_field_name, mapping
class ExtendedElasticSearchEngine(ConfigurableElasticSearchEngine):
backend = ExtendedElasticsearchBackend
class AttachmentField(SearchField):
field_type = 'attachment'
author_field = 'author'
def __init__(self, **kwargs):
if 'content_type_field' in kwargs:
self.content_type_field = kwargs.pop('content_type_field')
if 'author_field' in kwargs:
self.author_field = kwargs.pop('author_field')
super(AttachmentField, self).__init__(**kwargs)
def convert(self, value):
output = value
return output
@staticmethod
def _get_file_data(field):
if isinstance(field, fi_File):
field_file = field.file
name = field.label
try:
content_length = len(field_file)
except TypeError:
content_length = len(field_file.file)
content_type = mimetypes.guess_type(name)
try:
content = base64.b64encode(field_file.read())
except AttributeError:
content = base64.b64encode(field_file)
else: # isinstance(field, dj_File):
field_file = field
try:
content_length = len(field_file)
except TypeError:
content_length = len(field_file.file)
content_type = None
name = None
try:
content = base64.b64encode(field_file.read())
except AttributeError:
content = base64.b64encode(field_file)
output = {'_language': 'fr',
'_content': content,
'_content_type': content_type,
'_name': name,
'_title': name,
'_content_length': content_length
}
return output
def prepare(self, obj):
if self.model_attr:
field = getattr(obj, self.model_attr)
else:
field = obj
if not isinstance(field, (dj_File, fi_File)):
raise NotImplementedError('AttachmentField does not implement file reading for %s file'
% field.__class__.__name__)
output = self._get_file_data(field)
if settings.DEBUG:
import copy
_output = copy.deepcopy(output)
_output.update({'_content': _output['_content'][:50] + '...'})
logger.debug(u'AttachmentField::prepare() output = %s', json.dumps(_output, indent=2))
return output
class FacetedAttachmentField(FacetField, AttachmentField):
pass
def application_model_choices(app_name, using=DEFAULT_ALIAS):
choices = model_choices(using)
output = []
for choice in choices:
if app_name in choice[0]:
output.append(choice)
return output
class HaystackSearchForm(CollapsibleFieldsetFormMixin, BetterForm, SearchForm):
"""
haystack search form for main `searching` feature
"""
class Media:
js = (static('bootstrap-collapsible-fieldset/bootstrap-collapsible-fieldset.js'),)
css = {'all': (static('bootstrap-collapsible-fieldset/bootstrap-collapsible-fieldset.css'),)}
class Meta:
collapsed = True
fieldsets = (('main', {'legend': _('search'), 'fields': ('q', 'models')}),)
search_app = None
models = forms.MultipleChoiceField(choices=application_model_choices('intrapubs'),
required=False,
label=_('Search in'),
widget=u_widgets.ColumnCheckboxSelectMultiple(columns=3))
def get_search_app(self):
if self.search_app:
return self.search_app
raise NotImplementedError('%s must provide a search_app attribute or override get_search_app() method.')
def get_models(self):
"""
Return an alphabetical list of model classes in the index.
"""
search_models = []
if self.is_valid():
for model in self.cleaned_data['models']:
# noinspection PyUnresolvedReferences
search_models.append(dj_models.get_model(*model.split('.')))
return search_models
def search(self):
search_app = self.get_search_app()
search_query_set = super(HaystackSearchForm, self).search()
settings.DEBUG and logger.debug(u'HaystackSearchForm::search() len(search_query_set) = %d '
u'(before models filtering)', len(search_query_set))
if not search_query_set:
return []
search_query_set = search_query_set.models(*self.get_models())
if isinstance(search_app, basestring):
search_query_set = search_query_set.filter(django_ct__contains=search_app)
elif isinstance(search_app, (tuple, list)):
for app in search_app:
search_query_set = search_query_set.filter_or(django_ct__contains=app)
settings.DEBUG and logger.debug(u'HaystackSearchForm::search() len(search_query_set) = %d '
u'(after models filtering)', len(search_query_set))
return search_query_set
def no_query_found(self):
return []
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment