Andy Brock ajbrock

## FP16 Adam for PyTorch
import math
from torch.optim.optimizer import Optimizer

# This version of Adam keeps an fp32 copy of the parameters and
# does all of the parameter updates in fp32, while still doing the
# forwards and backwards passes using fp16 (i.e. fp16 copies of the
# parameters and fp16 activations).
#
# Note that this calls .float().cuda() on the params such that it
# moves them to gpu 0--if you're using a different GPU or want to

## resample3d.py
# Resample.py
# Andrew Brock, 2017
# This code resamples a 3d grid using catmull-rom spline interpolation, and is GPU accelerated.

# Resample along the trailing dimension
# Assumes a more-than-1D array? Or just directly assumes a 3D array? we'll find out
#

# TODO: Some things could be shared (such as the mgrid call, which can presumably be done once? hmm)
#       between resample1d calls.

## Mixture_of_softmaxes.py
# PyTorch code For implementing the mixture of softmaxes layer from
# "Breaking the Softmax Bottleneck: A High-Rank RNN Language Model"
# https://arxiv.org/abs/1711.03953
context = self.fc(out)

# Non-log version
priors = F.softmax(context[:,-self.n_components:])
mixtures = torch.stack([priors[:,i].unsqueeze(1) * F.softmax(context[:, i * self.nClasses : (i + 1) * self.nClasses]) for i in range(self.n_components)],1)
out = torch.log(mixtures.sum(1))

## Autoencoder Encoder Network Example
import theano
import lasagne.layers
from lasagne.layers import Conv2DLayer as C2D
from lasagne.nonlinearities import rectify as relu
from lasagne.layers import NonlinearityLayer as NL
from lasagne.layers import ElemwiseSumLayer as ESL
from lasagne.layers import batch_norm as BN

l_in = lasagne.layers.InputLayer(shape=(None,3,64,64)) # Assume incoming shape is a batch x RGB x W x H image
encoder_stem = C2D(

## situationally_faster_dilated_convs.py
###
# Situationally faster dilated convolutions through subpixel reshapes
# A Brock, 2016
#
# Script adapted from https://github.com/soumith/convnet-benchmarks/blob/master/theano/pylearn2_benchmark.py by Jan Schluter.
#
# Outputs of this script from my tests on a GTX 980 are available here: http://pastebin.com/JRBY4Qnf
#
# Outputs of this script from my tests on a Titan X are available here: http://pastebin.com/0zJ8Uvg0
#

## download_ICLR_2018.py
x=["SyBPtQfAZ","H1S8UE-Rb","S1sRrN-CW","Syt0r4bRZ","HkPCrEZ0Z","rJ5C67-C-","H1T2hmZAb","Hymt27b0Z",
"HJ5AUm-CZ","r1nzLmWAb","HkGJUXb0-","SkERSm-0-","BJlrSmbAZ","HJXyS7bRb","SyhRVm-Rb","SkwAEQbAb",
"B1mvVm-C-","S1TgE7WR-","H1DkN7ZCZ","SJ71VXZAZ","ryk77mbRZ","HJIhGXWCZ","BJInMmWC-","H1I3M7Z0b",
"Bk-ofQZRb","SJx9GQb0-","BJoBfQ-0b","SJyVzQ-C-","HJNGGmZ0Z","H1kMMmb0-","HkGbzX-AW","rJIgf7bAZ",
"SyCyMm-0W","r1ayG7WRZ","H1Nyf7W0Z","HkCvZXbC-","ByED-X-0W","ByuI-mW0W","H1BHbmWCZ","SkqV-XZRZ",
"rk07ZXZRb","HJCXZQbAZ","H1bbbXZC-","rkaqxm-0b","S1XolQbRW","B1TYxm-0-","Bkftl7ZCW","SyBBgXWAZ",
"SkrHeXbCW","S1ANxQW0b","ByOExmWAb","By4Nxm-CW","r1l4eQW0Z","B12QlQWRW","ry831QWAb","B1EGg7ZCb",
"HyMTkQZAb","rJ6iJmWCW","rkZB1XbRZ","HJnQJXbC-","Sy3fJXbA-","HJ8W1Q-0Z","HknbyQbC-","BkrsAzWAb",
"ryH20GbRW","r1HhRfWRZ","B1KFAGWAZ","Byht0GbRZ","B1hYRMbCW","S1q_Cz-Cb","BJ7d0fW0b","HyydRMZC-",
"SyZI0GWCZ","rJSr0GZR-","ryZERzWCZ","rkeZRGbRW","ryazCMbR-","Hyig0zb0Z","H11lAfbCW","HkXWCMbRW",

## my_batchnorm.py
# Manual BN
# Calculate means and variances using mean-of-squares mins mean-squared
def manual_bn(x, gain=None, bias=None, return_mean_var=False, eps=1e-5):
  # # Calculate expected value of x (m) and expected value of x**2 (m2)
  # Mean of x
  m = torch.mean(x, [0, 2, 3], keepdim=True)
  # Mean of x squared
  m2 = torch.mean(x ** 2, [0, 2, 3], keepdim=True)
  # Calculate variance as mean of squared minus mean squared.
  var = (m2 - m **2)

## fewest_dominion_words.py
import numpy as np
# Corpus available here: https://pastebin.com/WqD6fAgu
# Corpus taken from https://dominionstrategy.com/all-cards/

# Read all cards into memory
with open('dominion_cards.html', 'r') as rfile:
  x = rfile.readlines()

# Convenience function to count words, used later
def count_words(text):

## LayerNorm.py
# A simple Layer Norm implementation
# Andy Brock, March 2017
#
# Andy's Notes:
# -This is sort of hacky but it seems to work.
# -You may also want an affine transform in there.
# -Note the .cuda() call on the dummys!
class LayerNorm(nn.Module):
    def __init__(self):
        super(LayerNorm, self).__init__()

## shift_wide_ResNet2.py
## Wide ResNet with Shift and incorrect hyperparams.
# Based on code by xternalz: https://github.com/xternalz/WideResNet-pytorch
# WRN by Sergey Zagoruyko and Nikos Komodakis
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable as V
import torch.optim as optim
	import math
	from torch.optim.optimizer import Optimizer

	# This version of Adam keeps an fp32 copy of the parameters and
	# does all of the parameter updates in fp32, while still doing the
	# forwards and backwards passes using fp16 (i.e. fp16 copies of the
	# parameters and fp16 activations).
	#
	# Note that this calls .float().cuda() on the params such that it
	# moves them to gpu 0--if you're using a different GPU or want to
	# Resample.py
	# Andrew Brock, 2017
	# This code resamples a 3d grid using catmull-rom spline interpolation, and is GPU accelerated.

	# Resample along the trailing dimension
	# Assumes a more-than-1D array? Or just directly assumes a 3D array? we'll find out
	#

	# TODO: Some things could be shared (such as the mgrid call, which can presumably be done once? hmm)
	# between resample1d calls.
	# PyTorch code For implementing the mixture of softmaxes layer from
	# "Breaking the Softmax Bottleneck: A High-Rank RNN Language Model"
	# https://arxiv.org/abs/1711.03953
	context = self.fc(out)

	# Non-log version
	priors = F.softmax(context[:,-self.n_components:])
	mixtures = torch.stack([priors[:,i].unsqueeze(1) * F.softmax(context[:, i * self.nClasses : (i + 1) * self.nClasses]) for i in range(self.n_components)],1)
	out = torch.log(mixtures.sum(1))
	import theano
	import lasagne.layers
	from lasagne.layers import Conv2DLayer as C2D
	from lasagne.nonlinearities import rectify as relu
	from lasagne.layers import NonlinearityLayer as NL
	from lasagne.layers import ElemwiseSumLayer as ESL
	from lasagne.layers import batch_norm as BN

	l_in = lasagne.layers.InputLayer(shape=(None,3,64,64)) # Assume incoming shape is a batch x RGB x W x H image
	encoder_stem = C2D(
	###
	# Situationally faster dilated convolutions through subpixel reshapes
	# A Brock, 2016
	#
	# Script adapted from https://github.com/soumith/convnet-benchmarks/blob/master/theano/pylearn2_benchmark.py by Jan Schluter.
	#
	# Outputs of this script from my tests on a GTX 980 are available here: http://pastebin.com/JRBY4Qnf
	#
	# Outputs of this script from my tests on a Titan X are available here: http://pastebin.com/0zJ8Uvg0
	#
	x=["SyBPtQfAZ","H1S8UE-Rb","S1sRrN-CW","Syt0r4bRZ","HkPCrEZ0Z","rJ5C67-C-","H1T2hmZAb","Hymt27b0Z",
	"HJ5AUm-CZ","r1nzLmWAb","HkGJUXb0-","SkERSm-0-","BJlrSmbAZ","HJXyS7bRb","SyhRVm-Rb","SkwAEQbAb",
	"B1mvVm-C-","S1TgE7WR-","H1DkN7ZCZ","SJ71VXZAZ","ryk77mbRZ","HJIhGXWCZ","BJInMmWC-","H1I3M7Z0b",
	"Bk-ofQZRb","SJx9GQb0-","BJoBfQ-0b","SJyVzQ-C-","HJNGGmZ0Z","H1kMMmb0-","HkGbzX-AW","rJIgf7bAZ",
	"SyCyMm-0W","r1ayG7WRZ","H1Nyf7W0Z","HkCvZXbC-","ByED-X-0W","ByuI-mW0W","H1BHbmWCZ","SkqV-XZRZ",
	"rk07ZXZRb","HJCXZQbAZ","H1bbbXZC-","rkaqxm-0b","S1XolQbRW","B1TYxm-0-","Bkftl7ZCW","SyBBgXWAZ",
	"SkrHeXbCW","S1ANxQW0b","ByOExmWAb","By4Nxm-CW","r1l4eQW0Z","B12QlQWRW","ry831QWAb","B1EGg7ZCb",
	"HyMTkQZAb","rJ6iJmWCW","rkZB1XbRZ","HJnQJXbC-","Sy3fJXbA-","HJ8W1Q-0Z","HknbyQbC-","BkrsAzWAb",
	"ryH20GbRW","r1HhRfWRZ","B1KFAGWAZ","Byht0GbRZ","B1hYRMbCW","S1q_Cz-Cb","BJ7d0fW0b","HyydRMZC-",
	"SyZI0GWCZ","rJSr0GZR-","ryZERzWCZ","rkeZRGbRW","ryazCMbR-","Hyig0zb0Z","H11lAfbCW","HkXWCMbRW",
	# Manual BN
	# Calculate means and variances using mean-of-squares mins mean-squared
	def manual_bn(x, gain=None, bias=None, return_mean_var=False, eps=1e-5):
	# # Calculate expected value of x (m) and expected value of x**2 (m2)
	# Mean of x
	m = torch.mean(x, [0, 2, 3], keepdim=True)
	# Mean of x squared
	m2 = torch.mean(x ** 2, [0, 2, 3], keepdim=True)
	# Calculate variance as mean of squared minus mean squared.
	var = (m2 - m **2)
	import numpy as np
	# Corpus available here: https://pastebin.com/WqD6fAgu
	# Corpus taken from https://dominionstrategy.com/all-cards/

	# Read all cards into memory
	with open('dominion_cards.html', 'r') as rfile:
	x = rfile.readlines()

	# Convenience function to count words, used later
	def count_words(text):
	# A simple Layer Norm implementation
	# Andy Brock, March 2017
	#
	# Andy's Notes:
	# -This is sort of hacky but it seems to work.
	# -You may also want an affine transform in there.
	# -Note the .cuda() call on the dummys!
	class LayerNorm(nn.Module):
	def __init__(self):
	super(LayerNorm, self).__init__()
	## Wide ResNet with Shift and incorrect hyperparams.
	# Based on code by xternalz: https://github.com/xternalz/WideResNet-pytorch
	# WRN by Sergey Zagoruyko and Nikos Komodakis
	import math
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.autograd import Variable as V
	import torch.optim as optim