This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List, Tuple | |
import itertools | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
def mmr(doc_embedding: np.ndarray, | |
word_embeddings: np.ndarray, | |
words: List[str], | |
top_n: int = 5, | |
diversity: float = 0.9) -> List[Tuple[str, float]]: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install --quiet sense2vec==1.0.3 | |
!wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz | |
!tar -xvf s2v_reddit_2015_md.tar.gz | |
!ls s2v_old | |
# load sense2vec vectors | |
from sense2vec import Sense2Vec | |
s2v = Sense2Vec().from_disk('s2v_old') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!pip install --quiet sentence_transformers==2.2.0 | |
from sentence_transformers import SentenceTransformer | |
model= SentenceTransformer('all-MiniLM-L12-v2') | |
def get_answer_and_distractor_embeddings(answer,candidate_distractors): | |
answer_embedding = model.encode([answer]) | |
distractor_embeddings = model.encode(candidate_distractors) | |
return answer_embedding,distractor_embeddings |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
originalword = "Barack Obama" | |
word = originalword.lower() | |
word = word.replace(" ", "_") | |
print ("word ",word) | |
sense = s2v.get_best_sense(word) | |
print ("Best sense ",sense) | |
most_similar = s2v.most_similar(sense, n=20) | |
print (most_similar) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fastapi import Request,FastAPI | |
from pydantic import BaseModel | |
import uvicorn | |
from transformers.pipelines import pipeline | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import torch | |
app = FastAPI() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers.pipelines import pipeline | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import torch | |
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
print ("Device ", torch_device) | |
tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6") | |
model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6") | |
model = model.to(torch_device) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gradio as gr | |
def greet(sen): | |
question,answer,distractors,meaning = getMCQs(sen) | |
distractors_string = ', '.join(distractors) | |
return question,answer.capitalize(),distractors_string,meaning | |
textbox1 = gr.outputs.Textbox( type="auto", label="Question") | |
textbox2 = gr.outputs.Textbox(type="auto", label="Correct Answer") | |
textbox3 = gr.outputs.Textbox( type="auto", label="Distractors (wrong choices)") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def getMCQs(sent): | |
sentence_for_bert = sent.replace("**"," [TGT] ") | |
sentence_for_bert = " ".join(sentence_for_bert.split()) | |
# try: | |
sense,meaning,answer = get_sense(sentence_for_bert) | |
if sense is not None: | |
distractors = get_distractors_wordnet(sense,answer) | |
else: | |
distractors = ["Word not found in Wordnet. So unable to extract distractors."] | |
sentence_for_T5 = sent.replace("**"," ") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import os | |
from collections import namedtuple | |
import re | |
import torch | |
from tabulate import tabulate | |
from torch.nn.functional import softmax | |
from tqdm import tqdm | |
from transformers import BertTokenizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import math | |
from transformers import BertModel, BertConfig, BertPreTrainedModel, BertTokenizer | |
class BertWSD(BertPreTrainedModel): | |
def __init__(self, config): | |
super().__init__(config) | |
self.bert = BertModel(config) | |
self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) |
NewerOlder