Skip to content

Instantly share code, notes, and snippets.

View napsternxg's full-sized avatar
🎯
Focusing

Shubhanshu Mishra napsternxg

🎯
Focusing
View GitHub Profile
# Download the data from: https://gpc-browser.gs1.org/ using Download GPC as JSON
import json
data_file = "./data/GPC_May2024.json"
output_file = "./data/GPC_May2024.flattened.json"
with open(data_file) as fp:
data = json.load(fp)
from collections import defaultdict
from typing import Callable
class LazyVal:
_uncomputed_val = object()
def __init__(self, fn, *args, **kwargs) -> None:
self._val = self._uncomputed_val
self.args = args
@napsternxg
napsternxg / Walmart Info.md
Last active June 14, 2024 18:32
Walmart Data
@napsternxg
napsternxg / Colbertv2_Torch_Scratch.ipynb
Created January 24, 2024 18:04
Colbertv2_Torch_Scratch
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@napsternxg
napsternxg / onnx_edit.py
Last active January 24, 2024 15:58
Edit Onnx Model Ops
import onnx
model_path = "./model.onnx"
fixed_model_path = model_path.replace(".onnx", ".fixed.onnx")
# # Load the ONNX model which should have last layer as Sigmoid.
# LGBM Models may sometime not add the Sigmoid op during export when using regression loss
onnx_model = onnx.load(model_path)
print(onnx_model)
onnx.checker.check_model(onnx_model)
@napsternxg
napsternxg / TasteAtlas.ipynb
Last active October 24, 2023 21:06
TasteAtlas
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@napsternxg
napsternxg / display_ner.py
Last active October 17, 2023 17:35
NER utilities
from IPython.display import display, HTML
class DisplayEntities:
@classmethod
def display(cls, texts, grouped_entities):
html = []
html.append(cls.get_style())
for text, entities in zip(texts, grouped_entities):
html.append(cls.show_entities(text, entities))
display(HTML("".join(html)))
@napsternxg
napsternxg / generate.py
Created August 15, 2023 00:41
T5 CausalLM Constrained Generation Using Tries
import functools
import pandas as pd
import torch
import transformers
from accelerate import Accelerator
from datasets import Dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
@napsternxg
napsternxg / async_queue_runner.py
Last active August 28, 2023 21:26
asyncio_queue_event
import asyncio
import logging
import random
import time
from dataclasses import dataclass
from typing import Any
from tqdm.auto import tqdm
logger = logging.getLogger(__name__)
@napsternxg
napsternxg / setfit_sentence_transformer_fixed.py
Last active September 7, 2023 16:51
Sentence Transformer + Setfit classification head for inference without installing setfit
from datasets import load_dataset, Dataset, DatasetDict
from sentence_transformers.losses import CosineSimilarityLoss
from sentence_transformers import SentenceTransformer
from setfit import SetFitModel, SetFitTrainer, sample_dataset
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import json