- Create a Fast.ai machine from public templates w/ P4000 and public IP
- $ source deactivate fastai
- $ pip install virtualenv
# Filling in NaN values of a particular feature variable | |
avg_height = 67 # Maybe this is a good number | |
data["height"] = data["height"].fillna(avg_height) | |
# Filling in NaN values with a calculated one | |
avg_height = data["height"].median() # This is probably more accurate | |
data["height"] = data["height"].fillna(avg_height) | |
# Dropping rows with missing values | |
# Here we check which rows of "height" aren't null |
# Formattinng data | |
data['state'] = data['state'].str.upper() # Capitalize the whole thing | |
data['state'] = data['state'].replace( # Changing the format of the string | |
to_replace=["CA", "C.A", "CALI"], | |
value=["CALIFORNIA", "CALIFORNIA", "CALIFORNIA"]) | |
# Dates and times are quite common in large datasets | |
# Converting all strings to datetime objects is good standardisation practice | |
# Here, the data["time"] strings will look like "2019-01-15", which is exactly | |
# how we set the "format" variable below |
import numpy as np | |
import multiprocessing as multi | |
def chunks(n, page_list): | |
"""Splits the list into n chunks""" | |
return np.array_split(page_list,n) | |
cpus = multi.cpu_count() | |
workers = [] | |
page_list = ['www.website.com/page1.html', 'www.website.com/page2.html' |
""" | |
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) | |
BSD License | |
""" | |
import numpy as np | |
# data I/O | |
data = open('input.txt', 'r').read() # should be simple plain text file | |
chars = list(set(data)) | |
data_size, vocab_size = len(data), len(chars) |