Skip to content

Instantly share code, notes, and snippets.

@colonelrascals
Created February 26, 2018 14:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save colonelrascals/100fa26da23fd9c6ec19a342f64fe11b to your computer and use it in GitHub Desktop.
Save colonelrascals/100fa26da23fd9c6ec19a342f64fe11b to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from itertools import chain
import datetime
import hashlib
from django.db import models
from django.urls import reverse
from django.contrib.auth.models import AbstractBaseUser, BaseUserManager, PermissionsMixin
from django_mysql.models import JSONField, Model
from django.template.loader import render_to_string
from django.core.mail import send_mail
from django.utils import timezone
from django.conf import settings
from app.es.resource import Resource as ResourceDoc
class Resource(models.Model):
"""Initial representation of a resource."""
# ID from fixture resources, for internal dedupe
internal_id = models.CharField(max_length=20, null=True, blank=True)
name = models.CharField(max_length=200)
description = models.TextField(null=True, blank=True)
categories = models.ManyToManyField("Category")
neighborhoods = models.ManyToManyField("Neighborhood")
email_contact = models.EmailField(null=True, blank=True)
pdf = models.ManyToManyField("PDF")
phone = models.CharField(max_length=200, blank=True, null=True)
website = models.URLField(max_length=200, blank=True, null=True)
# address
street_address = models.CharField(max_length=400, null=True, blank=True)
city = models.CharField(max_length=100, null=True, blank=True)
state = models.CharField(max_length=10, null=True, blank=True)
latitude = models.FloatField(null=True, blank=True)
longitude = models.FloatField(null=True, blank=True)
zip_code = models.CharField(max_length=10, null=True, blank=True)
# meta
created_at = models.DateTimeField(auto_now_add=True)
modified_at = models.DateTimeField(auto_now=True)
def __str__(self):
return self.name
def __unicode__(self):
return u'{}'.format(self.name)
@property
def categories_str(self):
return [str(t) for t in self.categories.all()]
@property
def neighborhoods_str(self):
return [str(t) for t in self.neighborhoods.all() if t] or ["Houston"]
@property
def bookmark(self):
"""This is here to make it easier to serialize a standard resource."""
return getattr(self, "_bookmark", None)
@bookmark.setter
def bookmark(self, bookmark):
self._bookmark = bookmark
def indexing(self):
safe_zip = str(self.zip_code or "")
safe_neighborhood = [n for n in self.neighborhoods.all() if n] or ["Houston"]
obj = ResourceDoc(
meta={"id": self.id},
name=self.name,
resource_suggest=self.name,
email_contact=self.email_contact,
phone=self.phone,
description=self.description,
website=self.website,
categories=self.categories_str,
street_address=self.street_address,
city=self.city,
state=self.state,
zip_code=safe_zip,
neighborhoods=self.neighborhoods_str,
# TODO default to Houston for now but need a way to handle case where we don't know neighborhood or zip code
location_suggest=[str(attr) for attr in chain([safe_zip], safe_neighborhood) if attr],
created_at=self.created_at,
modified_at=self.modified_at,
)
if self.latitude and self.longitude:
obj.geo_coords = {
"lat": str(self.latitude),
"lon": str(self.longitude),
}
obj.save(index="resources")
return obj.to_dict(include_meta=True)
from elasticsearch import Elasticsearch
import json
es = Elasticsearch(["localhost:9200"])
resource_query="";
query_zip_code=""
query_body={
"query" : {
"match" : {"Zip_Code" : json.dumps(query_zip_code)}
}
}
res_zip=es.search(index="welnity_zipcode", doc_type="zipcode", body=query_body)
query_body={
"size":10000,
"query": {
"bool": {
"must": {
"multi_match": {
"query": resource_query,
"fields": ["Resource^3", "Category^5", "Description^4"],
"type":"cross_fields",
"tie_breaker":1.0
}
},
"filter" : {
"geo_distance" : {
"distance" : "8km",
"location" : {
"lat" : res_zip['hits']['hits'][0]['_source']['location']['lat'],
"lon" : res_zip['hits']['hits'][0]['_source']['location']['lon']
}
}
}
}
},
"sort": [
{
"_geo_distance": {
"location": {
"lat": res_zip['hits']['hits'][0]['_source']['location']['lat'],
"lon": res_zip['hits']['hits'][0]['_source']['location']['lon']
},
"order":"asc",
"unit":"km",
"distance_type": "plane"
}
}
]
}
res = es.search(index="welnity_production_2", doc_type="resource", body=query_body)
print("%d Resources Found!" % res['hits']['total'])
for i in range(0,res['hits']['total']):
print("Resource: ", res['hits']['hits'][i]['_source']['Resource'])
print("Category: ", res['hits']['hits'][i]['_source']['Category'])
print("Zip Code: ", res['hits']['hits'][i]['_source']['Zip_code'])
print("-------------------------------------------")
import logging
import json
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl import Index, DocType, String, Text, Date, Completion, GeoPoint, analyzer, Q, Search
from elasticsearch.helpers import bulk
from elasticsearch import Elasticsearch
from django.conf import settings
from connection import conn
logger = logging.getLogger("app.es")
resources_index = Index('resources')
resources_index.settings(
number_of_shards=5,
number_of_replicas=0
)
##eliminate the location of neighborhood
@resources_index.doc_type
class Resource(DocType):
name = String()
description = Text()
categories = String()
street_address = String()
city = String()
state = String()
geo_coords = GeoPoint()
zip_code = String()
neighborhoods = String()
created_at = Date()
modified_at = Date()
# autocomplete fields
resource_suggest = Completion(analyzer=analyzer("standard"))
location_suggest = Completion(analyzer=analyzer("standard"))
def resource_suggest(prefix):
logger.info("Searching for resource suggestions for '{}'".format(prefix))
return suggest(prefix, 'resource_suggest')
def location_suggest(prefix):
logger.info("Searching for location suggestions for '{}'".format(prefix))
return suggest(prefix, 'location_suggest')
def suggest(prefix, field):
s = Resource.search()
s = s.suggest('suggestions', text=prefix,
completion={'field': field, 'size': 100, 'fuzzy': {'fuzziness': settings.AUTOSUGGEST_FUZZINESS}})
logger.info("ES query: {}".format(json.dumps(s.to_dict())))
results = s.execute_suggest()
payloads = []
for r in results.suggestions:
if r.options:
payloads += [option.text for option in r.options]
# TODO: for now we hack deduping because ES5 doesn't do it for us (see size in query!)
return list(set(payloads))[:settings.AUTOSUGGEST_LIMIT]
def resource_query(resource_meta, locations, page):
logger.info("Searching for resources with resource_meta like '{}'".format(", ".join(resource_meta)))
from_val = settings.RESULTS_PER_PAGE * page - settings.RESULTS_PER_PAGE
to_val = settings.RESULTS_PER_PAGE * page
logger.info("From '{}' to '{}'".format(from_val, to_val))
s = Resource.search()[from_val:to_val]
resource_q = Q(
'multi_match',
query=" ".join(resource_meta),
fields=["name^3", "categories^5", "description^4"],
type="cross_fields",
tie_breaker=1.0
)
location_q = Q(
'multi_match',
query=" ".join(locations),
fields=["zip_code",],
type="cross_fields",
tie_breaker=1.0
)
print(location_q)
if resource_meta and locations:
q = (resource_q & location_q)
elif locations:
q = location_q
else:
q = resource_q
s.query = Q(q)
logger.info("ES query: {}".format(json.dumps(s.to_dict())))
results = s.execute()
logger.info("Got {} hits.".format(results.hits.total))
logger.info(results)
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment