Skip to content

Instantly share code, notes, and snippets.

@daisycamber
Created August 13, 2023 21:41
Show Gist options
  • Save daisycamber/39ca63c8950ec9fbd9a647647aad8ba4 to your computer and use it in GitHub Desktop.
Save daisycamber/39ca63c8950ec9fbd9a647647aad8ba4 to your computer and use it in GitHub Desktop.
Full text search using Python, with autocorrection
from feed.models import Post
import regex, datetime
from autocorrect import Speller
SEARCH_REGEX = r'\s({})[\s.,!?:)\-&*\"\';]'
ESCAPED_QUERIES = ['a', 'and', 'the', 'is', 'i']
def get_posts_for_query(request, qs):
now = datetime.datetime.fromtimestamp(int(request.GET.get('time')) / 1000)
spell = Speller()
qs = spell(qs)
qsplit = qs.split(' ')
posts = Post.objects.filter(content__icontains=qs, private=False, published=True, date_posted__lte=now)
for q in qsplit:
posts = posts.union(Post.objects.filter(content__icontains=q, private=False, published=True, date_posted__lte=now))
posts = posts.order_by('-date_posted')
pos = []
for post in posts:
count = 0
matches = regex.findall(SEARCH_REGEX.format(qs), post.content, flags=regex.IGNORECASE | regex.BESTMATCH)
count = count + len(matches)
for q in qsplit:
matches = regex.findall(SEARCH_REGEX.format(q), post.content, flags=regex.IGNORECASE | regex.BESTMATCH)
for match in matches:
if not match in ESCAPED_QUERIES:
count = count + 1
if count > 0:
pos = pos + [(post.id, count)]
pos = sorted(pos, key = lambda x: x[1], reverse=True)
posts = []
for post, count in pos:
post = Post.objects.get(id=post)
posts = posts + ([post] if post.public or request.user.is_authenticated and post.author in request.user.profile.subscriptions.all() or request.user.is_authenticated and request.user.profile.vendor else [])
return posts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment