Skip to content

Instantly share code, notes, and snippets.

@jacobian
Created November 4, 2018 14:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jacobian/4b65b3fbb7103829b941e23c506c822a to your computer and use it in GitHub Desktop.
Save jacobian/4b65b3fbb7103829b941e23c506c822a to your computer and use it in GitHub Desktop.
# https://stackoverflow.com/questions/53139643/django-postgres-array-field-count-number-of-overlaps
# !!! DOESN'T WORK but might with some more poking?
class Article(models.Model):
keywords = ArrayField(models.CharField(max_length=100))
def __str__(self):
return f"<Article {self.id} keywords={self.keywords}>"
def find_similar_articles(self, min_overlap=2):
# We need the field type to properly cast values below
kf = self.__class__._meta.get_field("keywords")
# Construct the inner subquery, which is of the form
# `SELECT UNNEST(a1) INTERSECT UNNEST(a2)`
# where a1 and a2 are arrays of keywords.`
#
# First step: the left- and right-hand queries, which UNNNEST
# the keyword field and provided values into rows
lhs = Article.objects.annotate(tag=Unnest(models.F("keywords")))
rhs = Article.objects.annotate(tag=Unnest(self.keywords, output_field=kf))
# Then, we need to make sure to use 'values' to ensure that the
# subquery returns only a single field.
lhs = lhs.values("tag")
rhs = rhs.values("tag")
# And here's that inner INTERSECT subquery
intersecting_tags_subquery = models.Subquery(
lhs.intersection(rhs), output_field=kf
)
# Finally, do the query:
# 1. exclude this row, because it will (by definition) match
qs = self.__class__.objects.exclude(id=self.id)
# 2. annotate with the number of matching rows
qs = qs.annotate(matching_keywords=Array(intersecting_tags_subquery))
# 3. and find only rows with more than min_overlap matches
qs = qs.filter(matching_keywords__len__gte=min_overlap)
return qs
class Unnest(models.Func):
function = "unnest"
class Array(models.Func):
function = "array"
arity = 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment