Skip to content

Instantly share code, notes, and snippets.

View ashokc's full-sized avatar

Ashok Chilakapati ashokc

View GitHub Profile
[program:gunicorn]
command=.../virtualenvs/.../bin/gunicorn -c .../quoteserver/config.py -b 0.0.0.0:9999 quotes:app
directory=.../quoteserver
autostart=true
redirect_stderr=true
stdout_logfile=/var/log/supervisor/gunicorn.log
@app.route('/css/<path:path>')
def css(path):
return send_from_directory(app.static_folder + '/css/', path, mimetype='text/css')
@app.route('/images/<path:path>')
def image(path):
return send_from_directory(app.static_folder + '/images/', path, mimetype='image/jpg')
from flask import Flask, request, render_template, logging
from elasticsearch import Elasticsearch
client = Elasticsearch([{'host':'localhost','port':9200}])
app = Flask(__name__)
@app.route('/quotes/byId', methods=['GET'])
def getById():
docId = request.args.get('id')
quote = client.get(index=index, id=docId)
return render_template('quote.html',quote=quote)
def runModel (train_docs, train_labels, test_docs, test_labels, vectorizer):
model = LogisticRegression(tol=1.0e-6,random_state=0,max_iter=20000, class_weight='balanced')
train_X = vectorizer.transform(train_docs)
test_X = vectorizer.transform(test_docs)
model.fit(train_X, train_labels)
predicted_labels = model.predict (test_X)
scores = model.predict_proba(test_X)
return findMetrics (scores, predicted_labels, test_labels)
def scoreBySigTerms (test_docs, sig_words, n_sig_terns):
scoresByLabel = {}
for label in [0,1]:
useSigWords = sig_words[label][0:n_sig_terns]
vectorizer = CountVectorizer(analyzer=lambda x: x, min_df=1, vocabulary=useSigWords)
test_doc_vectors = vectorizer.transform(test_docs)
a = np.sum(test_doc_vectors,axis=1)
b = []
for i in range(len(a)):
b.append(a[i,0])
def findSignificantTerms (ids, label):
body = { "query": { "bool" : { "must" : [ {"term": { "readmit": label} }, { "ids" : {'values' : ids} } ] } }, "aggregations": { "driver_words": { "significant_terms": { "field": "TOKENS", "size": max_sig_terms, chi_square : {}, "background_filter": { "terms": { "readmit": [ 0, 1 ] } } } } }, "size": 0 }
response = client.search(index=index,body=body, request_timeout=6000)
max_score = response['aggregations']['driver_words']['buckets'][0]['score']
words = [bucket['key'] for bucket in response['aggregations']['driver_words']['buckets']]
if (boosting == 'yes'):
boosts = [bucket['score']/max_score for bucket in response['aggregations']['driver_words']['buckets']]
else:
boosts = [1.0] * len(words)
{
"query": {
"term": {
"readmit": 1
}
},
"aggregations": {
"driver_words": {
"significant_terms": {
"field": "TOKENS",
def removeEmptyNotes():
body = { "bool": { "must_not": { "exists": { "field": "TOKENS" } } } }
update_by_query = { "script": { "source": "ctx._source.readmit=-1", "lang": "painless" }, "query": body}
client.update_by_query(index=index, body=update_by_query, timeout='2m')
def removeExpired ():
body = { "bool": { "must": { "term": { "CONTENT": "expired" } } } }
update_by_query = { "script": { "source": "ctx._source.readmit=-1", "lang": "painless" }, "query": body}
client.update_by_query(index=index, body=update_by_query, timeout='2m')
def removeDeaths():
body = { "bool": { "must": [ { "match": { "HOSPITAL_EXPIRE_FLAG": 1 } }, { "match": { "readmit": 0 } } ] } }
update_by_query = { "script": { "source": "ctx._source.readmit=-1", "lang": "painless" }, "query": body}
client.update_by_query(index=index, body=update_by_query, timeout='2m')