Skip to content

Instantly share code, notes, and snippets.

@cmutel
Created November 19, 2015 17:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cmutel/1e02ff2f9ec53e06d902 to your computer and use it in GitHub Desktop.
Save cmutel/1e02ff2f9ec53e06d902 to your computer and use it in GitHub Desktop.
from __future__ import print_function, unicode_literals
from whoosh import index, query
from whoosh.fields import TEXT, ID, Schema
from whoosh.qparser import MultifieldParser
from whoosh.query import Term
import string
import os
import pprint
import psutil
import random
import gc
def open_files():
proc = psutil.Process()
return len(proc.open_files())
schema = Schema(
txt=TEXT(stored=True, sortable=True),
comment=TEXT(stored=True),
database=ID(stored=True, sortable=True),
key=ID(unique=True, stored=True),
)
path = os.path.join(os.getcwd(), "whoosh-index")
if not os.path.exists(path):
os.mkdir(path)
def generate_random_corpus():
make_text = lambda : " ".join([
"".join(random.sample(
string.ascii_letters,
random.randrange(3, 15)))
for _ in range(random.randrange(5, 40))
])
index_ = index.open_dir(path)
writer = index_.writer()
for _ in range(1000):
writer.add_document(txt=make_text(), comment="", database="example")
writer.commit()
index_.close()
try:
index_ = index.open_dir(path)
except index.EmptyIndexError:
index_ = index.create_in(path, schema)
generate_random_corpus()
qp = MultifieldParser(["name", "comment"], schema)
print("Without searching, {} open files".format(open_files()))
def search():
with index_.searcher() as searcher:
searcher.search(qp.parse("foo"))
print("After `search`, {} files are open".format(open_files()))
def search_with_filter():
with index_.searcher() as searcher:
searcher.search(qp.parse("foo"), filter=Term("database", "example"))
print("After `search_with_filter`, {} files are open".format(open_files()))
search()
search()
search()
search_with_filter()
search_with_filter()
search_with_filter()
index_.close()
print("After closing index, {} files are open".format(open_files()))
gc.collect()
print("After garbage collection, {} files are open".format(open_files()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment