Created
July 2, 2020 19:21
-
-
Save Chandrak1907/b5adf9f35d7e15d82e0c29e95d79703a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Whoosh Filtering Issue" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import whoosh\n", | |
"import timeit\n", | |
"import pandas as pd\n", | |
"from whoosh.index import create_in\n", | |
"from whoosh.fields import *\n", | |
"from whoosh.qparser import QueryParser\n", | |
"from whoosh import qparser, query\n", | |
"from whoosh.query import Term" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Create schema\n", | |
"schema = Schema(title=TEXT(stored=True), ind_id=ID(stored=True), content=TEXT(stored = True))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Add trial cleaned documents to schema\n", | |
"iy = create_in(\"indexdir\", schema)\n", | |
"writer = iy.writer()\n", | |
"writer.add_document(title=u\"First document\", ind_id=u\"123\",content=u\"This is the first document we are using as an example.\")\n", | |
"writer.add_document(title=u\"Second document\", ind_id=u\"123\",content=u\"This is the second document we are using as an example: it is not the first.\")\n", | |
"writer.add_document(title=u\"Third document\", ind_id=u\"456\",content=u\"This is the third document we are using as an example.\")\n", | |
"writer.commit()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 1.1 Working Example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<Top 2 Results for Term('content', 'example') runtime=0.0007069744169712067> 2\n" | |
] | |
} | |
], | |
"source": [ | |
"with iy.searcher() as searcher:\n", | |
" q = QueryParser(\"content\", iy.schema).parse(\"example\")\n", | |
" allow_q = query.Term(\"ind_id\", \"123\")\n", | |
" results = searcher.search(q, filter=allow_q, limit=None)\n", | |
" print(results, len(results))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## 1.2 Not Working" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def filtered():\n", | |
" \n", | |
" #search for term in individual\n", | |
" with iy.searcher() as searcher:\n", | |
" query = QueryParser(\"content\", iy.schema).parse(\"example\")\n", | |
" allow_q = query.Term(\"ind_id\", \"123\")\n", | |
" results = searcher.search(query, filter=allow_q, limit=None)\n", | |
" print(results, len(results))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "AttributeError", | |
"evalue": "'Term' object has no attribute 'Term'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-6-1e07ac61d74e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfiltered\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m<ipython-input-5-02f7f6bb4e02>\u001b[0m in \u001b[0;36mfiltered\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0miy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearcher\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msearcher\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mquery\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mQueryParser\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"content\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"example\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mallow_q\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTerm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"ind_id\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"123\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msearcher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mallow_q\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlimit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mAttributeError\u001b[0m: 'Term' object has no attribute 'Term'" | |
] | |
} | |
], | |
"source": [ | |
"filtered()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda env:pyconnect_py37]", | |
"language": "python", | |
"name": "conda-env-pyconnect_py37-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment