Skip to content

Instantly share code, notes, and snippets.

@Chandrak1907
Created July 2, 2020 19:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Chandrak1907/b5adf9f35d7e15d82e0c29e95d79703a to your computer and use it in GitHub Desktop.
Save Chandrak1907/b5adf9f35d7e15d82e0c29e95d79703a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Whoosh Filtering Issue"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import whoosh\n",
"import timeit\n",
"import pandas as pd\n",
"from whoosh.index import create_in\n",
"from whoosh.fields import *\n",
"from whoosh.qparser import QueryParser\n",
"from whoosh import qparser, query\n",
"from whoosh.query import Term"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Create schema\n",
"schema = Schema(title=TEXT(stored=True), ind_id=ID(stored=True), content=TEXT(stored = True))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Add trial cleaned documents to schema\n",
"iy = create_in(\"indexdir\", schema)\n",
"writer = iy.writer()\n",
"writer.add_document(title=u\"First document\", ind_id=u\"123\",content=u\"This is the first document we are using as an example.\")\n",
"writer.add_document(title=u\"Second document\", ind_id=u\"123\",content=u\"This is the second document we are using as an example: it is not the first.\")\n",
"writer.add_document(title=u\"Third document\", ind_id=u\"456\",content=u\"This is the third document we are using as an example.\")\n",
"writer.commit()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1.1 Working Example"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Top 2 Results for Term('content', 'example') runtime=0.0007069744169712067> 2\n"
]
}
],
"source": [
"with iy.searcher() as searcher:\n",
" q = QueryParser(\"content\", iy.schema).parse(\"example\")\n",
" allow_q = query.Term(\"ind_id\", \"123\")\n",
" results = searcher.search(q, filter=allow_q, limit=None)\n",
" print(results, len(results))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1.2 Not Working"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def filtered():\n",
" \n",
" #search for term in individual\n",
" with iy.searcher() as searcher:\n",
" query = QueryParser(\"content\", iy.schema).parse(\"example\")\n",
" allow_q = query.Term(\"ind_id\", \"123\")\n",
" results = searcher.search(query, filter=allow_q, limit=None)\n",
" print(results, len(results))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'Term' object has no attribute 'Term'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-6-1e07ac61d74e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfiltered\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-5-02f7f6bb4e02>\u001b[0m in \u001b[0;36mfiltered\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0miy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearcher\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msearcher\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mquery\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mQueryParser\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"content\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"example\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mallow_q\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTerm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ind_id\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"123\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msearcher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mallow_q\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlimit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'Term' object has no attribute 'Term'"
]
}
],
"source": [
"filtered()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:pyconnect_py37]",
"language": "python",
"name": "conda-env-pyconnect_py37-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment