Skip to content

Instantly share code, notes, and snippets.

@4OH4
4OH4 / nltk.py
Created February 2, 2019 15:20
Initialise NLTK
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet
print(wordnet.get_version())
@4OH4
4OH4 / database.py
Last active November 15, 2019 22:13
Doctest case embedded in the class documentation for a DAO
import os
import sqlite3
class DAO(object):
"""
SQLite3 Data Access Object
Usage:
>>> dao = DAO('example.db')
Database connection initialised
@4OH4
4OH4 / tfidf_basic.py
Created March 29, 2020 09:36
Basic TF-idf model using Scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
search_terms = 'fruit and vegetables'
documents = ['cars drive on the road', 'tomatoes are actually fruit']
doc_vectors = TfidfVectorizer().fit_transform([search_terms] + documents)
cosine_similarities = linear_kernel(doc_vectors[0:1], doc_vectors).flatten()
document_scores = [item.item() for item in cosine_similarities[1:]]
@4OH4
4OH4 / tfidf_adv.py
Last active April 13, 2023 21:56
TF-idf model with stopwords and lemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
import nltk
from nltk.corpus import stopwords
# Download stopwords list
nltk.download('punkt')
stop_words = set(stopwords.words('english'))
@4OH4
4OH4 / pytesting_hypothesis2.py
Last active May 11, 2020 15:32
Hypothesis text generation - example output
# Show some generated text examples
from hypothesis.strategies import text
for _ in range(10):
text().example()
# ''
# '\x17\x14'
# '\x1d\x08'
# '(\U000adacd\x0e\x02\x1e'
@4OH4
4OH4 / pytesting_hypothesis3.py
Created May 11, 2020 15:41
Using Hypothesis to generate date objects and strings
import datetime
from hypothesis.strategies import dates
from hypothesis import given
from truth.truth import AssertThat
# Module under test
from app.core.worker import Worker
# Generate dates within the four digit year range
@4OH4
4OH4 / output.py
Created January 24, 2021 14:40
Output class that defines the fields and data types of the API response
from pydantic import BaseModel, Field
class MessageOutput(BaseModel):
message1: str = Field(..., title="Greeting")
message2: str = Field(..., title="Calculation result")
n: int = Field(..., title="n: a large integer")
largest_prime_factor: int = Field(..., title="Largest prime factor of n")
elapsed_time: float = Field(..., title="Calculation time (seconds)")
@4OH4
4OH4 / hello.py
Last active January 24, 2021 14:50
FastAPI endpoint that runs a calculation and returns result and metadata
from fastapi import APIRouter
from service.core.models.output import MessageOutput
from service.core.models.input import MessageInput
from service.core.logic.business_logic import run_prime_factor_calculation
router = APIRouter()
@router.post("/hello", response_model=MessageOutput, tags=["hello post"])
def hello_endpoint(inputs: MessageInput):
# Respond to requests on the hello endpoint
@4OH4
4OH4 / pycuda_exampple_pt1.py
Created November 13, 2021 20:29
Creating a CUDA function with PyCUDA (pt. 1)
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
# Define our function using CUDA code
cuda_func_def = """
__global__ void multiply(float *result, float *a, float *b)
{
const int i = threadIdx.x;
@4OH4
4OH4 / pycuda_exampple_pt2.py
Created November 13, 2021 20:39
Allocating memory on the GPU and copying data with PyCUDA (pt. 2)
# create Python variables
a = np.random.randn(100).astype(np.float32)
b = np.random.randn(100).astype(np.float32)
result = np.random.randn(100).astype(np.float32)
# allocate memory on GPU
a_gpu = cuda.mem_alloc(a.nbytes)
b_gpu = cuda.mem_alloc(b.nbytes)
result_gpu = cuda.mem_alloc(b.nbytes)