Skip to content

Instantly share code, notes, and snippets.

View narendraprasath's full-sized avatar

Narendra Prasath narendraprasath

View GitHub Profile
@narendraprasath
narendraprasath / image_serve_flask.py
Last active June 17, 2020 16:17
flask image serving endpoint
from flask import Flask, request, send_from_directory, send_file
import os
from flask_cors import CORS, cross_origin
# set the project root directory as the static folder
app = Flask(__name__, static_url_path='')
CORS(app)
app.config["CORS_HEADERS"]= 'Content-Type'
## flask endpoint to serve image using filename
@narendraprasath
narendraprasath / COVID_Tracker.py
Created June 17, 2020 13:42
Visualize covid cases from the API
import os
import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import json
from datetime import datetime, timedelta
import locale
import traceback
from matplotlib import dates
## helps to retrieve similar question based of input vectors/embeddings for test query
def retrieveSimilarFAQ(train_question_vectors, test_question_vectors, train_QA_df, train_column_name, test_QA_df, test_column_name):
similar_question_index = []
for test_index, test_vector in enumerate(test_question_vectors):
sim, sim_Q_index = -1, -1
for train_index, train_vector in enumerate(train_question_vectors):
sim_score = cosine_similarity(train_vector, test_vector)[0][0]
if sim < sim_score:
sim = sim_score
class Embeddings():
def __init__(self, model_path):
self.model_path = model_path
self.model = None
self.__load_model__()
def __load_model__(self):
#word_vectors = api.load("glove-wiki-gigaword-100")
model_name = 'glove-twitter-25' #'word2vec-google-news-50' #'glove-twitter-25'
if not os.path.exists(self.model_path+ model_name):
class TF_IDF():
def __init__(self):
self.dictionary = None
self.model = None
self.bow_corpus = None
def create_tf_idf_model(self, data_df, column_name):
## create sentence token list
sentence_token_list = [sentence.split(" ") for sentence in data_df[column_name]]
## Data Preprocessing
class TextPreprocessor():
def __init__(self, data_df, column_name=None):
self.data_df = data_df
if not column_name and type(colum_name) == str:
raise Exception("column name is mandatory. Make sure type is string format")
self.column = column_name
self.convert_lowercase()
self.applied_stopword = False
self.processed_column_name = f"processed_{self.column}"
## QA will be stored as .csv file
def extract_QA_from_text_file(INPUT_DIR, text_file_name):
output_file_name = 'covid_19faq.csv'
with open(os.path.join(INPUT_DIR, text_file_name), 'r', encoding='latin') as obj:
text = obj.read()
text = text.strip()
## extract the question by following pattern
pattern = '\n+\s*\d+[.](.*?)\?'
question_pattern = re.compile(pattern,re.MULTILINE|re.IGNORECASE|re.DOTALL)
## The data is taken from https://www.un.org/sites/un2.un.org/files/new_dhmosh_covid-19_faq.pdf
## it has FAQ based question and answering for COVID-19
def download_pdf_url(dataset_url, file_name):
response = requests.get(dataset_url)
pdf_content_output = None
with io.BytesIO(response.content) as open_pdf_file:
with open(file_name,'w') as obj:
obj.write(str(open_pdf_file))