Skip to content

Instantly share code, notes, and snippets.

View napsternxg's full-sized avatar
🎯
Focusing

Shubhanshu Mishra napsternxg

🎯
Focusing
View GitHub Profile
@napsternxg
napsternxg / Colbertv2_Torch_Scratch.ipynb
Created January 24, 2024 18:04
Colbertv2_Torch_Scratch
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@napsternxg
napsternxg / onnx_edit.py
Last active January 24, 2024 15:58
Edit Onnx Model Ops
import onnx
model_path = "./model.onnx"
fixed_model_path = model_path.replace(".onnx", ".fixed.onnx")
# # Load the ONNX model which should have last layer as Sigmoid.
# LGBM Models may sometime not add the Sigmoid op during export when using regression loss
onnx_model = onnx.load(model_path)
print(onnx_model)
onnx.checker.check_model(onnx_model)
@napsternxg
napsternxg / TasteAtlas.ipynb
Last active October 24, 2023 21:06
TasteAtlas
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@napsternxg
napsternxg / display_ner.py
Last active October 17, 2023 17:35
NER utilities
from IPython.display import display, HTML
class DisplayEntities:
@classmethod
def display(cls, texts, grouped_entities):
html = []
html.append(cls.get_style())
for text, entities in zip(texts, grouped_entities):
html.append(cls.show_entities(text, entities))
display(HTML("".join(html)))
@napsternxg
napsternxg / generate.py
Created August 15, 2023 00:41
T5 CausalLM Constrained Generation Using Tries
import functools
import pandas as pd
import torch
import transformers
from accelerate import Accelerator
from datasets import Dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
@napsternxg
napsternxg / async_queue_runner.py
Last active August 28, 2023 21:26
asyncio_queue_event
import asyncio
import logging
import random
import time
from dataclasses import dataclass
from typing import Any
from tqdm.auto import tqdm
logger = logging.getLogger(__name__)
@napsternxg
napsternxg / setfit_sentence_transformer_fixed.py
Last active September 7, 2023 16:51
Sentence Transformer + Setfit classification head for inference without installing setfit
from datasets import load_dataset, Dataset, DatasetDict
from sentence_transformers.losses import CosineSimilarityLoss
from sentence_transformers import SentenceTransformer
from setfit import SetFitModel, SetFitTrainer, sample_dataset
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import json
"""Faster Implementation of Unsupervised Query Segmentation.
Uses vectorized operations
- author: @napsternxg
Unsupervised Query Segmentation Using only Query Logs [Mishra et. al. 2011]
https://www.microsoft.com/en-us/research/wp-content/uploads/2011/01/pp0295-mishra.pdf
@napsternxg
napsternxg / app.py
Created June 15, 2023 07:27
Queued Map with retries
from flask import Flask, jsonify, request, render_template
from queued_map import example_items
app = Flask(__name__)
@app.get("/")
@app.get("/<int:n>")
def home(n: int=10):
output = example_items(n)
@napsternxg
napsternxg / async_decorator.py
Created June 15, 2023 07:22
Async Decorator
import asyncio
def async_decorator(acreate_fn):
async def _f(*args, **kwargs):
print(f"Decorated fn: {args=}, {kwargs=}. Sleeping.")
await asyncio.sleep(0.1)
return await acreate_fn(*args, **kwargs)
return _f