Skip to content

Instantly share code, notes, and snippets.

@danlou
danlou / knn.py
Created September 28, 2023 15:18
intent detection, knn experiment
from random import shuffle
from collections import Counter
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sentence_transformers import SentenceTransformer
@danlou
danlou / marvel_corpus.py
Created April 5, 2017 11:00
Code used to compile Marvel corpus from Wikia using a combination of their API and some scraping to get missing info. Could be tidier, may improve this sometime.
import re
import json
import requests
from scrapy.selector import Selector
from click import progressbar
from nltk.tokenize import word_tokenize
from nltk.tokenize import MWETokenizer
from nltk.tokenize.punkt import PunktSentenceTokenizer, PunktParameters
punkt_param = PunktParameters()