Jack Harding jacKlinc

## search_images_bing.py
def search_images_bing(key, term, max_images: int = 150, **kwargs):
  params = {'q':term, 'count':max_images}
  headers = {"Ocp-Apim-Subscription-Key":key}
  search_url = "https://api.bing.microsoft.com/v7.0/images/search"
  response = requests.get(search_url, headers=headers,  params=params)
  response.raise_for_status()
  search_results = response.json()
  return L(search_results['value'])

## make_category.py
def make_category(cat, path, label):
  if not path.exists():
    path.mkdir()
  dest = (path/label)
  dest.mkdir(exist_ok=True)
  results = search_images_bing(key, cat)
  download_images(dest, urls=results.attrgot('contentUrl'))

## data_block_example.py
masks = DataBlock(
  blocks=(ImageBlock, CategoryBlock),
  get_items=get_image_files(path),
  splitter=RandomSplitter(valid_pct=0.2, seed=42),
  get_y=parent_label,
  item_tfms=Resize(128))

## kaggle_api_dataset.py
from kaggle.api.kaggle_api_extended import KaggleApi
from zipfile import ZipFile
import pandas as pd

def get_kaggle_dataset(dataset, d_file, used_dtypes, usecols):
    '''
    Pass Kaggle dataset URL (user/dataset) and dataset file
    Returns Pandas DataFrame for dataset

    **your kaggle api key must be saved in .kaggle/kaggle.json

## contextual_image_search.py
def search_contextual(key, term, max_images: int=100, **kwargs):
  url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/ImageSearchAPI"
  querystring = {"q": term, "pageNumber": "1", "pageSize": max_images, "autoCorrect": "true"}
  headers = {
      'x-rapidapi-key': key,
      'x-rapidapi-host': "contextualwebsearch-websearch-v1.p.rapidapi.com"
      }
  response = requests.request("GET", url, headers=headers, params=querystring).json()

  images=[]

## dot_product_bias.py
from fastai import *
from fastbook import *

def create_params(size):
    """
      Pass tensor shape
      Returns normalised model parameters
    """
    return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))

## txt_word_counter.py
from sklearn.feature_extraction.text import CountVectorizer

def parse_txt(txt_file):
    """
    Pass text file location and returns n list elements for each line in the file
    """
    with open(txt_file, "r") as f:
        # Reads files, removes new lines and appends to list
        words = f.read().splitlines()
        # Removes None elements

## JRE_Elon.txt
welcome back here we go again great to

see you and congratulations

thank you you will never forget what is

going on in the world when you think

about when your child is born you will

## basic_language_model.py
class LanguageModel(Module):
  """
  Takes three words as input and returns a probability for the next
  The 1st layer will use the first word's embedding
  The 2nd layer will use the 2nd word's embedding and the 1st word's output activations
  The 3rd layer will use the 3rd word's embedding plus the 2nd word's output activations
  """
  def __init__(self, vocab_sz, n_hidden):
    self.i_h = nn.Embedding(vocab_sz, n_hidden) # Converts the indices to a vector
    self.h_h = nn.Linear(n_hidden, n_hidden) # Creates the activations for the successive word

## improved_language_model.py
class LanguageModelRecurrentState(Module):
  """
  State is saved by moving the reset to the init method

  Gradients are detached for all but 3 layers
  """
  def __init__(self, vocab_sz, n_hidden):
    self.i_h = nn.Embedding(vocab_sz, n_hidden)
    self.h_h = nn.Linear(n_hidden, n_hidden)
    self.h_o = nn.Linear(n_hidden, vocab_sz)
	def search_images_bing(key, term, max_images: int = 150, **kwargs):
	params = {'q':term, 'count':max_images}
	headers = {"Ocp-Apim-Subscription-Key":key}
	search_url = "https://api.bing.microsoft.com/v7.0/images/search"
	response = requests.get(search_url, headers=headers, params=params)
	response.raise_for_status()
	search_results = response.json()
	return L(search_results['value'])
	def make_category(cat, path, label):
	if not path.exists():
	path.mkdir()
	dest = (path/label)
	dest.mkdir(exist_ok=True)
	results = search_images_bing(key, cat)
	download_images(dest, urls=results.attrgot('contentUrl'))
	masks = DataBlock(
	blocks=(ImageBlock, CategoryBlock),
	get_items=get_image_files(path),
	splitter=RandomSplitter(valid_pct=0.2, seed=42),
	get_y=parent_label,
	item_tfms=Resize(128))
	from kaggle.api.kaggle_api_extended import KaggleApi
	from zipfile import ZipFile
	import pandas as pd

	def get_kaggle_dataset(dataset, d_file, used_dtypes, usecols):
	'''
	Pass Kaggle dataset URL (user/dataset) and dataset file
	Returns Pandas DataFrame for dataset

	**your kaggle api key must be saved in .kaggle/kaggle.json
	def search_contextual(key, term, max_images: int=100, **kwargs):
	url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/ImageSearchAPI"
	querystring = {"q": term, "pageNumber": "1", "pageSize": max_images, "autoCorrect": "true"}
	headers = {
	'x-rapidapi-key': key,
	'x-rapidapi-host': "contextualwebsearch-websearch-v1.p.rapidapi.com"
	}
	response = requests.request("GET", url, headers=headers, params=querystring).json()

	images=[]
	from fastai import *
	from fastbook import *

	def create_params(size):
	"""
	Pass tensor shape
	Returns normalised model parameters
	"""
	return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))
	from sklearn.feature_extraction.text import CountVectorizer

	def parse_txt(txt_file):
	"""
	Pass text file location and returns n list elements for each line in the file
	"""
	with open(txt_file, "r") as f:
	# Reads files, removes new lines and appends to list
	words = f.read().splitlines()
	# Removes None elements
	welcome back here we go again great to

	see you and congratulations

	thank you you will never forget what is

	going on in the world when you think

	about when your child is born you will
	class LanguageModel(Module):
	"""
	Takes three words as input and returns a probability for the next
	The 1st layer will use the first word's embedding
	The 2nd layer will use the 2nd word's embedding and the 1st word's output activations
	The 3rd layer will use the 3rd word's embedding plus the 2nd word's output activations
	"""
	def __init__(self, vocab_sz, n_hidden):
	self.i_h = nn.Embedding(vocab_sz, n_hidden) # Converts the indices to a vector
	self.h_h = nn.Linear(n_hidden, n_hidden) # Creates the activations for the successive word
	class LanguageModelRecurrentState(Module):
	"""
	State is saved by moving the reset to the init method

	Gradients are detached for all but 3 layers
	"""
	def __init__(self, vocab_sz, n_hidden):
	self.i_h = nn.Embedding(vocab_sz, n_hidden)
	self.h_h = nn.Linear(n_hidden, n_hidden)
	self.h_o = nn.Linear(n_hidden, vocab_sz)