Skip to content

Instantly share code, notes, and snippets.

@ashokc
Created December 23, 2019 02:03
Show Gist options
  • Save ashokc/c5eeddef34fac928786917e54efcdf59 to your computer and use it in GitHub Desktop.
Save ashokc/c5eeddef34fac928786917e54efcdf59 to your computer and use it in GitHub Desktop.
def findSignificantTerms (ids, label):
body = { "query": { "bool" : { "must" : [ {"term": { "readmit": label} }, { "ids" : {'values' : ids} } ] } }, "aggregations": { "driver_words": { "significant_terms": { "field": "TOKENS", "size": max_sig_terms, chi_square : {}, "background_filter": { "terms": { "readmit": [ 0, 1 ] } } } } }, "size": 0 }
response = client.search(index=index,body=body, request_timeout=6000)
max_score = response['aggregations']['driver_words']['buckets'][0]['score']
words = [bucket['key'] for bucket in response['aggregations']['driver_words']['buckets']]
if (boosting == 'yes'):
boosts = [bucket['score']/max_score for bucket in response['aggregations']['driver_words']['buckets']]
else:
boosts = [1.0] * len(words)
return words, boosts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment