Soumith Chintala soumith

## visualize-filters.lua
require 'torch'
require 'nn'
require 'image'
require 'gfx.js'

mod = nn.SpatialConvolution(5,10, 16, 16)
filters = mod.weight
gfx.image(filters:view(5*10, 16, 16), {zoom=5.0})


## config.mk
CC := clang
AR := ar
NVCC := nvcc
CUDA_OBJS := cuda/cwc_convnet.o
DEFINE_MACROS := -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_CBLAS -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_CUDA
CFLAGS := -msse2 $(DEFINE_MACROS)
NVFLAGS := --use_fast_math -arch=sm_30 $(DEFINE_MACROS)
LDFLAGS := -lm -lpng -ljpeg -lfftw3 -lfftw3f -lpthread -lblas -lavcodec -lavformat -lswscale -lcuda -lcudart -lcublas -L"/usr/local/cuda/lib64"

## corrected.lua
require 'paths'

local current_dir = 'torch7'
checkFile = function( current_dir )
   local list = paths.dir( current_dir )
   table.sort(list, function (a,b) return a<b end)
    for i = 3, #list do
       list[i] = paths.concat(current_dir, list[i])
       if paths.filep( list[i] )then
          print( ' ==> found file '.. list[i] )

## rbm_out.log
Torch 7.0  Copyright (C) 2001-2011 Idiap, NEC Labs, NYU
<mnist> loading only 1000 examples
<mnist> reading 1000 examples with 784+1 dimensions...
<mnist> done
<mnist> loading only 1000 examples
<mnist> reading 1000 examples with 784+1 dimensions...
<mnist> done
1/10 - Recon err: 39.1
2/10 - Recon err: 25.0
3/10 - Recon err: 20.9

## gist:8c4f4e3c2a079c29207b
numStrings = 10                    -- for example, lets do 10, but this number can be anything upto memory limits
maxStringLength = 100          -- this has to be predetermined

-- allocate CharTensor
bigStringTensor = torch.CharTensor(numStrings, maxStringLength)
bst_data=torch.data(bigStringTensor)        -- raw C pointer using torchffi

-- load some strings into the stringTensor
str='hello world'

## argcheckbug.lua
do
   require 'torch'
   local ffi = require 'ffi'
   local argcheck = require 'argcheck'

   local dataset = torch.class('torch.dataset')

   local initcheck = argcheck{
      pack=true,
      {name="paths",             type="table",    help="Multiple paths of directories with images"},

## gist:1f7645f14738d39be2b5
require 'cudnn'
require 'cunn'

local cudnntest = {}
local precision_forward = 1e-4
local precision_backward = 1e-2
local precision_jac = 1e-3
local nloop = 1
local times = {}

## gist:0f95facad88cbea68c6d
local Linear, parent = torch.class('nn.NoBiasLinear', 'nn.Linear')

function Linear:__init(inputSize, outputSize)
   parent.__init(self, inputSize, outputSize)

   self.bias:fill(0)
end


function Linear:accGradParameters(input, gradOutput, scale)

## benchmarks prelim
All timings averaged over 10 runs and in miliseconds on a Tesla K40m with ECC off.
Time combines forward + backwardInputs + backwardGradients
###############################################################################
ModelType: AlexNet      Kernels: cudnn  Input shape: 128x3x224x224
cudnn                                   :updateOutput():     147.32
cudnn                                :updateGradInput():     167.79
cudnn                              :accGradParameters():     153.96
cudnn                                            :TOTAL:     469.07
ModelType: AlexNet      Kernels: nn     Input shape: 128x3x224x224
nn                                      :updateOutput():     201.53

## gist:4a1339a078b4c0f7fd2b
cudnnActivationBackward bug (sigmoid)
this file has printed states of the following:
src (output)
srcdiff (gradOutput)
dest (input)
destdiff (gradInput)

look at destdiff, and notice the numerous nans produced
--------------------------------------------------------------
src (output)
	require 'torch'
	require 'nn'
	require 'image'
	require 'gfx.js'

	mod = nn.SpatialConvolution(5,10, 16, 16)
	filters = mod.weight
	gfx.image(filters:view(5*10, 16, 16), {zoom=5.0})
	CC := clang
	AR := ar
	NVCC := nvcc
	CUDA_OBJS := cuda/cwc_convnet.o
	DEFINE_MACROS := -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_CBLAS -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_CUDA
	CFLAGS := -msse2 $(DEFINE_MACROS)
	NVFLAGS := --use_fast_math -arch=sm_30 $(DEFINE_MACROS)
	LDFLAGS := -lm -lpng -ljpeg -lfftw3 -lfftw3f -lpthread -lblas -lavcodec -lavformat -lswscale -lcuda -lcudart -lcublas -L"/usr/local/cuda/lib64"
	require 'paths'

	local current_dir = 'torch7'
	checkFile = function( current_dir )
	local list = paths.dir( current_dir )
	table.sort(list, function (a,b) return a<b end)
	for i = 3, #list do
	list[i] = paths.concat(current_dir, list[i])
	if paths.filep( list[i] )then
	print( ' ==> found file '.. list[i] )
	Torch 7.0 Copyright (C) 2001-2011 Idiap, NEC Labs, NYU
	<mnist> loading only 1000 examples
	<mnist> reading 1000 examples with 784+1 dimensions...
	<mnist> done
	<mnist> loading only 1000 examples
	<mnist> reading 1000 examples with 784+1 dimensions...
	<mnist> done
	1/10 - Recon err: 39.1
	2/10 - Recon err: 25.0
	3/10 - Recon err: 20.9
	numStrings = 10 -- for example, lets do 10, but this number can be anything upto memory limits
	maxStringLength = 100 -- this has to be predetermined

	-- allocate CharTensor
	bigStringTensor = torch.CharTensor(numStrings, maxStringLength)
	bst_data=torch.data(bigStringTensor) -- raw C pointer using torchffi

	-- load some strings into the stringTensor
	str='hello world'
	do
	require 'torch'
	local ffi = require 'ffi'
	local argcheck = require 'argcheck'

	local dataset = torch.class('torch.dataset')

	local initcheck = argcheck{
	pack=true,
	{name="paths", type="table", help="Multiple paths of directories with images"},
	require 'cudnn'
	require 'cunn'

	local cudnntest = {}
	local precision_forward = 1e-4
	local precision_backward = 1e-2
	local precision_jac = 1e-3
	local nloop = 1
	local times = {}
	local Linear, parent = torch.class('nn.NoBiasLinear', 'nn.Linear')

	function Linear:__init(inputSize, outputSize)
	parent.__init(self, inputSize, outputSize)

	self.bias:fill(0)
	end


	function Linear:accGradParameters(input, gradOutput, scale)
	All timings averaged over 10 runs and in miliseconds on a Tesla K40m with ECC off.
	Time combines forward + backwardInputs + backwardGradients
	###############################################################################
	ModelType: AlexNet Kernels: cudnn Input shape: 128x3x224x224
	cudnn :updateOutput(): 147.32
	cudnn :updateGradInput(): 167.79
	cudnn :accGradParameters(): 153.96
	cudnn :TOTAL: 469.07
	ModelType: AlexNet Kernels: nn Input shape: 128x3x224x224
	nn :updateOutput(): 201.53
	cudnnActivationBackward bug (sigmoid)
	this file has printed states of the following:
	src (output)
	srcdiff (gradOutput)
	dest (input)
	destdiff (gradInput)

	look at destdiff, and notice the numerous nans produced
	--------------------------------------------------------------
	src (output)