Created
November 19, 2015 17:22
-
-
Save cmutel/1e02ff2f9ec53e06d902 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function, unicode_literals | |
from whoosh import index, query | |
from whoosh.fields import TEXT, ID, Schema | |
from whoosh.qparser import MultifieldParser | |
from whoosh.query import Term | |
import string | |
import os | |
import pprint | |
import psutil | |
import random | |
import gc | |
def open_files(): | |
proc = psutil.Process() | |
return len(proc.open_files()) | |
schema = Schema( | |
txt=TEXT(stored=True, sortable=True), | |
comment=TEXT(stored=True), | |
database=ID(stored=True, sortable=True), | |
key=ID(unique=True, stored=True), | |
) | |
path = os.path.join(os.getcwd(), "whoosh-index") | |
if not os.path.exists(path): | |
os.mkdir(path) | |
def generate_random_corpus(): | |
make_text = lambda : " ".join([ | |
"".join(random.sample( | |
string.ascii_letters, | |
random.randrange(3, 15))) | |
for _ in range(random.randrange(5, 40)) | |
]) | |
index_ = index.open_dir(path) | |
writer = index_.writer() | |
for _ in range(1000): | |
writer.add_document(txt=make_text(), comment="", database="example") | |
writer.commit() | |
index_.close() | |
try: | |
index_ = index.open_dir(path) | |
except index.EmptyIndexError: | |
index_ = index.create_in(path, schema) | |
generate_random_corpus() | |
qp = MultifieldParser(["name", "comment"], schema) | |
print("Without searching, {} open files".format(open_files())) | |
def search(): | |
with index_.searcher() as searcher: | |
searcher.search(qp.parse("foo")) | |
print("After `search`, {} files are open".format(open_files())) | |
def search_with_filter(): | |
with index_.searcher() as searcher: | |
searcher.search(qp.parse("foo"), filter=Term("database", "example")) | |
print("After `search_with_filter`, {} files are open".format(open_files())) | |
search() | |
search() | |
search() | |
search_with_filter() | |
search_with_filter() | |
search_with_filter() | |
index_.close() | |
print("After closing index, {} files are open".format(open_files())) | |
gc.collect() | |
print("After garbage collection, {} files are open".format(open_files())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment