Created
February 26, 2018 14:16
-
-
Save colonelrascals/100fa26da23fd9c6ec19a342f64fe11b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from __future__ import unicode_literals | |
from itertools import chain | |
import datetime | |
import hashlib | |
from django.db import models | |
from django.urls import reverse | |
from django.contrib.auth.models import AbstractBaseUser, BaseUserManager, PermissionsMixin | |
from django_mysql.models import JSONField, Model | |
from django.template.loader import render_to_string | |
from django.core.mail import send_mail | |
from django.utils import timezone | |
from django.conf import settings | |
from app.es.resource import Resource as ResourceDoc | |
class Resource(models.Model): | |
"""Initial representation of a resource.""" | |
# ID from fixture resources, for internal dedupe | |
internal_id = models.CharField(max_length=20, null=True, blank=True) | |
name = models.CharField(max_length=200) | |
description = models.TextField(null=True, blank=True) | |
categories = models.ManyToManyField("Category") | |
neighborhoods = models.ManyToManyField("Neighborhood") | |
email_contact = models.EmailField(null=True, blank=True) | |
pdf = models.ManyToManyField("PDF") | |
phone = models.CharField(max_length=200, blank=True, null=True) | |
website = models.URLField(max_length=200, blank=True, null=True) | |
# address | |
street_address = models.CharField(max_length=400, null=True, blank=True) | |
city = models.CharField(max_length=100, null=True, blank=True) | |
state = models.CharField(max_length=10, null=True, blank=True) | |
latitude = models.FloatField(null=True, blank=True) | |
longitude = models.FloatField(null=True, blank=True) | |
zip_code = models.CharField(max_length=10, null=True, blank=True) | |
# meta | |
created_at = models.DateTimeField(auto_now_add=True) | |
modified_at = models.DateTimeField(auto_now=True) | |
def __str__(self): | |
return self.name | |
def __unicode__(self): | |
return u'{}'.format(self.name) | |
@property | |
def categories_str(self): | |
return [str(t) for t in self.categories.all()] | |
@property | |
def neighborhoods_str(self): | |
return [str(t) for t in self.neighborhoods.all() if t] or ["Houston"] | |
@property | |
def bookmark(self): | |
"""This is here to make it easier to serialize a standard resource.""" | |
return getattr(self, "_bookmark", None) | |
@bookmark.setter | |
def bookmark(self, bookmark): | |
self._bookmark = bookmark | |
def indexing(self): | |
safe_zip = str(self.zip_code or "") | |
safe_neighborhood = [n for n in self.neighborhoods.all() if n] or ["Houston"] | |
obj = ResourceDoc( | |
meta={"id": self.id}, | |
name=self.name, | |
resource_suggest=self.name, | |
email_contact=self.email_contact, | |
phone=self.phone, | |
description=self.description, | |
website=self.website, | |
categories=self.categories_str, | |
street_address=self.street_address, | |
city=self.city, | |
state=self.state, | |
zip_code=safe_zip, | |
neighborhoods=self.neighborhoods_str, | |
# TODO default to Houston for now but need a way to handle case where we don't know neighborhood or zip code | |
location_suggest=[str(attr) for attr in chain([safe_zip], safe_neighborhood) if attr], | |
created_at=self.created_at, | |
modified_at=self.modified_at, | |
) | |
if self.latitude and self.longitude: | |
obj.geo_coords = { | |
"lat": str(self.latitude), | |
"lon": str(self.longitude), | |
} | |
obj.save(index="resources") | |
return obj.to_dict(include_meta=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from elasticsearch import Elasticsearch | |
import json | |
es = Elasticsearch(["localhost:9200"]) | |
resource_query=""; | |
query_zip_code="" | |
query_body={ | |
"query" : { | |
"match" : {"Zip_Code" : json.dumps(query_zip_code)} | |
} | |
} | |
res_zip=es.search(index="welnity_zipcode", doc_type="zipcode", body=query_body) | |
query_body={ | |
"size":10000, | |
"query": { | |
"bool": { | |
"must": { | |
"multi_match": { | |
"query": resource_query, | |
"fields": ["Resource^3", "Category^5", "Description^4"], | |
"type":"cross_fields", | |
"tie_breaker":1.0 | |
} | |
}, | |
"filter" : { | |
"geo_distance" : { | |
"distance" : "8km", | |
"location" : { | |
"lat" : res_zip['hits']['hits'][0]['_source']['location']['lat'], | |
"lon" : res_zip['hits']['hits'][0]['_source']['location']['lon'] | |
} | |
} | |
} | |
} | |
}, | |
"sort": [ | |
{ | |
"_geo_distance": { | |
"location": { | |
"lat": res_zip['hits']['hits'][0]['_source']['location']['lat'], | |
"lon": res_zip['hits']['hits'][0]['_source']['location']['lon'] | |
}, | |
"order":"asc", | |
"unit":"km", | |
"distance_type": "plane" | |
} | |
} | |
] | |
} | |
res = es.search(index="welnity_production_2", doc_type="resource", body=query_body) | |
print("%d Resources Found!" % res['hits']['total']) | |
for i in range(0,res['hits']['total']): | |
print("Resource: ", res['hits']['hits'][i]['_source']['Resource']) | |
print("Category: ", res['hits']['hits'][i]['_source']['Category']) | |
print("Zip Code: ", res['hits']['hits'][i]['_source']['Zip_code']) | |
print("-------------------------------------------") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import json | |
from elasticsearch_dsl.connections import connections | |
from elasticsearch_dsl import Index, DocType, String, Text, Date, Completion, GeoPoint, analyzer, Q, Search | |
from elasticsearch.helpers import bulk | |
from elasticsearch import Elasticsearch | |
from django.conf import settings | |
from connection import conn | |
logger = logging.getLogger("app.es") | |
resources_index = Index('resources') | |
resources_index.settings( | |
number_of_shards=5, | |
number_of_replicas=0 | |
) | |
##eliminate the location of neighborhood | |
@resources_index.doc_type | |
class Resource(DocType): | |
name = String() | |
description = Text() | |
categories = String() | |
street_address = String() | |
city = String() | |
state = String() | |
geo_coords = GeoPoint() | |
zip_code = String() | |
neighborhoods = String() | |
created_at = Date() | |
modified_at = Date() | |
# autocomplete fields | |
resource_suggest = Completion(analyzer=analyzer("standard")) | |
location_suggest = Completion(analyzer=analyzer("standard")) | |
def resource_suggest(prefix): | |
logger.info("Searching for resource suggestions for '{}'".format(prefix)) | |
return suggest(prefix, 'resource_suggest') | |
def location_suggest(prefix): | |
logger.info("Searching for location suggestions for '{}'".format(prefix)) | |
return suggest(prefix, 'location_suggest') | |
def suggest(prefix, field): | |
s = Resource.search() | |
s = s.suggest('suggestions', text=prefix, | |
completion={'field': field, 'size': 100, 'fuzzy': {'fuzziness': settings.AUTOSUGGEST_FUZZINESS}}) | |
logger.info("ES query: {}".format(json.dumps(s.to_dict()))) | |
results = s.execute_suggest() | |
payloads = [] | |
for r in results.suggestions: | |
if r.options: | |
payloads += [option.text for option in r.options] | |
# TODO: for now we hack deduping because ES5 doesn't do it for us (see size in query!) | |
return list(set(payloads))[:settings.AUTOSUGGEST_LIMIT] | |
def resource_query(resource_meta, locations, page): | |
logger.info("Searching for resources with resource_meta like '{}'".format(", ".join(resource_meta))) | |
from_val = settings.RESULTS_PER_PAGE * page - settings.RESULTS_PER_PAGE | |
to_val = settings.RESULTS_PER_PAGE * page | |
logger.info("From '{}' to '{}'".format(from_val, to_val)) | |
s = Resource.search()[from_val:to_val] | |
resource_q = Q( | |
'multi_match', | |
query=" ".join(resource_meta), | |
fields=["name^3", "categories^5", "description^4"], | |
type="cross_fields", | |
tie_breaker=1.0 | |
) | |
location_q = Q( | |
'multi_match', | |
query=" ".join(locations), | |
fields=["zip_code",], | |
type="cross_fields", | |
tie_breaker=1.0 | |
) | |
print(location_q) | |
if resource_meta and locations: | |
q = (resource_q & location_q) | |
elif locations: | |
q = location_q | |
else: | |
q = resource_q | |
s.query = Q(q) | |
logger.info("ES query: {}".format(json.dumps(s.to_dict()))) | |
results = s.execute() | |
logger.info("Got {} hits.".format(results.hits.total)) | |
logger.info(results) | |
return results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment