Created
November 4, 2018 14:30
-
-
Save jacobian/4b65b3fbb7103829b941e23c506c822a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://stackoverflow.com/questions/53139643/django-postgres-array-field-count-number-of-overlaps | |
# !!! DOESN'T WORK but might with some more poking? | |
class Article(models.Model): | |
keywords = ArrayField(models.CharField(max_length=100)) | |
def __str__(self): | |
return f"<Article {self.id} keywords={self.keywords}>" | |
def find_similar_articles(self, min_overlap=2): | |
# We need the field type to properly cast values below | |
kf = self.__class__._meta.get_field("keywords") | |
# Construct the inner subquery, which is of the form | |
# `SELECT UNNEST(a1) INTERSECT UNNEST(a2)` | |
# where a1 and a2 are arrays of keywords.` | |
# | |
# First step: the left- and right-hand queries, which UNNNEST | |
# the keyword field and provided values into rows | |
lhs = Article.objects.annotate(tag=Unnest(models.F("keywords"))) | |
rhs = Article.objects.annotate(tag=Unnest(self.keywords, output_field=kf)) | |
# Then, we need to make sure to use 'values' to ensure that the | |
# subquery returns only a single field. | |
lhs = lhs.values("tag") | |
rhs = rhs.values("tag") | |
# And here's that inner INTERSECT subquery | |
intersecting_tags_subquery = models.Subquery( | |
lhs.intersection(rhs), output_field=kf | |
) | |
# Finally, do the query: | |
# 1. exclude this row, because it will (by definition) match | |
qs = self.__class__.objects.exclude(id=self.id) | |
# 2. annotate with the number of matching rows | |
qs = qs.annotate(matching_keywords=Array(intersecting_tags_subquery)) | |
# 3. and find only rows with more than min_overlap matches | |
qs = qs.filter(matching_keywords__len__gte=min_overlap) | |
return qs | |
class Unnest(models.Func): | |
function = "unnest" | |
class Array(models.Func): | |
function = "array" | |
arity = 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment