Skip to content

Instantly share code, notes, and snippets.

View ramsrigouthamg's full-sized avatar

Ramsri Goutham Golla ramsrigouthamg

View GitHub Profile
from typing import List, Tuple
import itertools
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def mmr(doc_embedding: np.ndarray,
word_embeddings: np.ndarray,
words: List[str],
top_n: int = 5,
diversity: float = 0.9) -> List[Tuple[str, float]]:
!pip install --quiet sense2vec==1.0.3
!wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz
!tar -xvf s2v_reddit_2015_md.tar.gz
!ls s2v_old
# load sense2vec vectors
from sense2vec import Sense2Vec
s2v = Sense2Vec().from_disk('s2v_old')
!pip install --quiet sentence_transformers==2.2.0
from sentence_transformers import SentenceTransformer
model= SentenceTransformer('all-MiniLM-L12-v2')
def get_answer_and_distractor_embeddings(answer,candidate_distractors):
answer_embedding = model.encode([answer])
distractor_embeddings = model.encode(candidate_distractors)
return answer_embedding,distractor_embeddings
originalword = "Barack Obama"
word = originalword.lower()
word = word.replace(" ", "_")
print ("word ",word)
sense = s2v.get_best_sense(word)
print ("Best sense ",sense)
most_similar = s2v.most_similar(sense, n=20)
print (most_similar)
from fastapi import Request,FastAPI
from pydantic import BaseModel
import uvicorn
from transformers.pipelines import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
app = FastAPI()
@ramsrigouthamg
ramsrigouthamg / summarization_HF_Transformers.py
Created October 2, 2021 21:02
GPU Summarization using HuggingFace Transformers
from transformers.pipelines import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
print ("Device ", torch_device)
tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
model = model.to(torch_device)
import gradio as gr
def greet(sen):
question,answer,distractors,meaning = getMCQs(sen)
distractors_string = ', '.join(distractors)
return question,answer.capitalize(),distractors_string,meaning
textbox1 = gr.outputs.Textbox( type="auto", label="Question")
textbox2 = gr.outputs.Textbox(type="auto", label="Correct Answer")
textbox3 = gr.outputs.Textbox( type="auto", label="Distractors (wrong choices)")
def getMCQs(sent):
sentence_for_bert = sent.replace("**"," [TGT] ")
sentence_for_bert = " ".join(sentence_for_bert.split())
# try:
sense,meaning,answer = get_sense(sentence_for_bert)
if sense is not None:
distractors = get_distractors_wordnet(sense,answer)
else:
distractors = ["Word not found in Wordnet. So unable to extract distractors."]
sentence_for_T5 = sent.replace("**"," ")
import csv
import os
from collections import namedtuple
import re
import torch
from tabulate import tabulate
from torch.nn.functional import softmax
from tqdm import tqdm
from transformers import BertTokenizer
import torch
import math
from transformers import BertModel, BertConfig, BertPreTrainedModel, BertTokenizer
class BertWSD(BertPreTrainedModel):
def __init__(self, config):
super().__init__(config)
self.bert = BertModel(config)
self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)