Skip to content

Instantly share code, notes, and snippets.

@jacKlinc
jacKlinc / search_images_bing.py
Created December 5, 2020 10:57
Bing Image Search Function
def search_images_bing(key, term, max_images: int = 150, **kwargs):
params = {'q':term, 'count':max_images}
headers = {"Ocp-Apim-Subscription-Key":key}
search_url = "https://api.bing.microsoft.com/v7.0/images/search"
response = requests.get(search_url, headers=headers, params=params)
response.raise_for_status()
search_results = response.json()
return L(search_results['value'])
@jacKlinc
jacKlinc / make_category.py
Last active December 14, 2020 16:05
Image Data Curation
def make_category(cat, path, label):
if not path.exists():
path.mkdir()
dest = (path/label)
dest.mkdir(exist_ok=True)
results = search_images_bing(key, cat)
download_images(dest, urls=results.attrgot('contentUrl'))
@jacKlinc
jacKlinc / data_block_example.py
Created December 8, 2020 17:00
FastAI Data Block Creation
masks = DataBlock(
blocks=(ImageBlock, CategoryBlock),
get_items=get_image_files(path),
splitter=RandomSplitter(valid_pct=0.2, seed=42),
get_y=parent_label,
item_tfms=Resize(128))
@jacKlinc
jacKlinc / kaggle_api_dataset.py
Last active January 9, 2021 14:54
Pull data from Kaggle dataset
from kaggle.api.kaggle_api_extended import KaggleApi
from zipfile import ZipFile
import pandas as pd
def get_kaggle_dataset(dataset, d_file, used_dtypes, usecols):
'''
Pass Kaggle dataset URL (user/dataset) and dataset file
Returns Pandas DataFrame for dataset
**your kaggle api key must be saved in .kaggle/kaggle.json
@jacKlinc
jacKlinc / contextual_image_search.py
Last active January 9, 2021 14:55
Search for images using contextual image search API
def search_contextual(key, term, max_images: int=100, **kwargs):
url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/ImageSearchAPI"
querystring = {"q": term, "pageNumber": "1", "pageSize": max_images, "autoCorrect": "true"}
headers = {
'x-rapidapi-key': key,
'x-rapidapi-host': "contextualwebsearch-websearch-v1.p.rapidapi.com"
}
response = requests.request("GET", url, headers=headers, params=querystring).json()
images=[]
@jacKlinc
jacKlinc / dot_product_bias.py
Created January 22, 2021 07:53
Collaborative filtering model architecture for movie recommendation.
from fastai import *
from fastbook import *
def create_params(size):
"""
Pass tensor shape
Returns normalised model parameters
"""
return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))
from sklearn.feature_extraction.text import CountVectorizer
def parse_txt(txt_file):
"""
Pass text file location and returns n list elements for each line in the file
"""
with open(txt_file, "r") as f:
# Reads files, removes new lines and appends to list
words = f.read().splitlines()
# Removes None elements
@jacKlinc
jacKlinc / JRE_Elon.txt
Created February 24, 2021 13:44
Analyse word count of a YouTube podcast video.
welcome back here we go again great to
see you and congratulations
thank you you will never forget what is
going on in the world when you think
about when your child is born you will
@jacKlinc
jacKlinc / basic_language_model.py
Last active March 12, 2021 07:29
The first language model explicitly declares each layer, while the second does the same with a loop.
class LanguageModel(Module):
"""
Takes three words as input and returns a probability for the next
The 1st layer will use the first word's embedding
The 2nd layer will use the 2nd word's embedding and the 1st word's output activations
The 3rd layer will use the 3rd word's embedding plus the 2nd word's output activations
"""
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden) # Converts the indices to a vector
self.h_h = nn.Linear(n_hidden, n_hidden) # Creates the activations for the successive word
@jacKlinc
jacKlinc / improved_language_model.py
Created March 16, 2021 07:46
The first model resets the state, while the second improves on this by introducing more signal through increasing the sequence length.
class LanguageModelRecurrentState(Module):
"""
State is saved by moving the reset to the init method
Gradients are detached for all but 3 layers
"""
def __init__(self, vocab_sz, n_hidden):
self.i_h = nn.Embedding(vocab_sz, n_hidden)
self.h_h = nn.Linear(n_hidden, n_hidden)
self.h_o = nn.Linear(n_hidden, vocab_sz)