Skip to content

Instantly share code, notes, and snippets.

@isoboroff
Created August 27, 2021 13:42
Show Gist options
  • Save isoboroff/e0bfa827d73ed708d0267987e6c822d8 to your computer and use it in GitHub Desktop.
Save isoboroff/e0bfa827d73ed708d0267987e6c822d8 to your computer and use it in GitHub Desktop.
Do a TREC title-only run against an ElasticSearch index.
#!/usr/bin/env python3
from elasticsearch import Elasticsearch, TransportError
import argparse
import re
import sys
ap = argparse.ArgumentParser(description='Do a baseline run against an Elasticsearch index')
ap.add_argument('--host', default='localhost', help='Elasticsearch host')
ap.add_argument('--port', default=9200, help='Elasticsearch port')
ap.add_argument('--index', help='Index to search against')
ap.add_argument('--field', default=r'<title>', help='Topic field (regex) to use as query')
ap.add_argument('--tid_field', default=r'<number>', help='Topic ID field')
ap.add_argument('--num_hits', '-n', default=1000, help='Number of hits per query')
ap.add_argument('--runtag', default='foobar', help='Run tag for output file')
ap.add_argument('topics', help='Topic file')
args = ap.parse_args()
es = Elasticsearch(hosts=[{"host": args.host, "port": args.port}],
retry_on_timeout=True, max_retries=10)
with open(args.topics, 'r') as topicfile:
tid = 'UNK'
for line in topicfile:
if re.search(args.tid_field, line):
tid = re.sub(r'<[^>]+>', '', line, re.IGNORECASE)
tid = tid.replace(r'Number:', '')
tid = tid.strip()
if re.search(args.field, line):
query = line.rstrip()
query = re.sub(r'<[^>]+>', '', query, re.IGNORECASE)
print(tid, query, file=sys.stderr)
results = es.search(index=args.index,
q=query,
size=args.num_hits,
_source=False)
for rank, hit in enumerate(results['hits']['hits'], start=1):
print(tid, "Q0", hit['_id'], rank, hit['_score'], args.runtag)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment