Pavan Yalamanchili pavanky

## .clang_complete

-DDEBUG
-I/home/pyalamanchili/Workspace/source/foo/bar/include/
-I/usr/include/c++/7.3.0/

## test_fft.py
import arrayfire as af
import numpy as np
N = []; err_af = []; err_np = []
eps = 1E-16
for i in range(2, 64):
    in_array_np = np.random.rand(i)
    in_array_af = af.to_array(in_array_np)
    try:
        out_array_af_1 = af.ifft(af.fft(in_array_af), scale=1) / i
        out_array_af_2 = af.ifft(af.fft(in_array_af))

## difference_computation.cpp
```
/*******************************************************
 * Copyright (c) 2014, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

## memory.log.1
Python 3.6.1 (default, Mar 27 2017, 00:27:06)
[GCC 6.3.1 20170306] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import arrayfire as af
ERROR: GLFW wasn't able to initalize
>>> a = af.randu(5)
>>> af.print_mem_info()
Memory Info
---------------------------------------------------------
|     POINTER      |    SIZE    |  AF LOCK  | USER LOCK |

## Pytho_fft_times
2
single: 0.22531747817993164
100 af.time solves        [s]: 8.939504623413086e-05
4
single: 0.002744913101196289
100 af.time solves        [s]: 7.341623306274414e-05
6
single: 0.0005710124969482422
100 af.time solves        [s]: 5.37109375e-05
8

## Binary tests log
$ ./test/binary_cuda
Running main() from gtest_main.cc
[==========] Running 191 tests from 2 test cases.
[----------] Global test environment set-up.
[----------] 107 tests from BinaryTests
[ RUN      ] BinaryTests.Test_add_float_float
[       OK ] BinaryTests.Test_add_float_float (567 ms)
[ RUN      ] BinaryTests.Test_add_float_float_left
[       OK ] BinaryTests.Test_add_float_float_left (183 ms)
[ RUN      ] BinaryTests.Test_add_float_float_right

## sample.txt
>>> a = af.randu(5,5)
>>> af.print_mem_info()
Memory Info
---------------------------------------------------------
|     POINTER      |    SIZE    |  AF LOCK  | USER LOCK |
---------------------------------------------------------
|     0x706400000  |       1 KB |       Yes |        No |
---------------------------------------------------------
>>> b = af.randu(5,5)
>>> af.print_mem_info()

## IndexLinear_Bench.lua
local cudaAvailable, _ = pcall(require, 'cunn')

local function benchmark(opt)
   local isize = opt.inputSize or 100000
   local osize = opt.outputSize or 1
   local weightDecay = opt.weightDecay or 0
   local nnzMin = opt.featuresMinNumber or 1
   local nnzMax = opt.featuresMaxNumber or 10000
   local idxMin = 1
   local idxMax = isize

## test_call_once.cpp
#include <mutex>
#include <thread>
#include <cstdio>
#include <vector>
#include <iostream>

class Node
{
private:
    int m_a, m_b, m_c;

## inference_benchmark.txt
Params [Size: 10000 x 1, Batch: 1, nnz: (25,50), features: 385]
Speedup (SL vs IL): GPU: 1.4440615290605
----------------------------------------------------------------------------------

Params [Size: 10000 x 1, Batch: 4, nnz: (25,50), features: 1457]
Speedup (SL vs IL): GPU: 1.9608264852205
----------------------------------------------------------------------------------

Params [Size: 10000 x 1, Batch: 32, nnz: (25,50), features: 12087]
Speedup (SL vs IL): GPU: 5.6983979608533

	-DDEBUG
	-I/home/pyalamanchili/Workspace/source/foo/bar/include/
	-I/usr/include/c++/7.3.0/
	import arrayfire as af
	import numpy as np
	N = []; err_af = []; err_np = []
	eps = 1E-16
	for i in range(2, 64):
	in_array_np = np.random.rand(i)
	in_array_af = af.to_array(in_array_np)
	try:
	out_array_af_1 = af.ifft(af.fft(in_array_af), scale=1) / i
	out_array_af_2 = af.ifft(af.fft(in_array_af))
	```
	/*******************************************************
	* Copyright (c) 2014, ArrayFire
	* All rights reserved.
	*
	* This file is distributed under 3-clause BSD license.
	* The complete license agreement can be obtained at:
	* http://arrayfire.com/licenses/BSD-3-Clause
	********************************************************/
	Python 3.6.1 (default, Mar 27 2017, 00:27:06)
	[GCC 6.3.1 20170306] on linux
	Type "help", "copyright", "credits" or "license" for more information.
	>>> import arrayfire as af
	ERROR: GLFW wasn't able to initalize
	>>> a = af.randu(5)
	>>> af.print_mem_info()
	Memory Info
	---------------------------------------------------------
	\| POINTER \| SIZE \| AF LOCK \| USER LOCK \|
	2
	single: 0.22531747817993164
	100 af.time solves [s]: 8.939504623413086e-05
	4
	single: 0.002744913101196289
	100 af.time solves [s]: 7.341623306274414e-05
	6
	single: 0.0005710124969482422
	100 af.time solves [s]: 5.37109375e-05
	8
	$ ./test/binary_cuda
	Running main() from gtest_main.cc
	[==========] Running 191 tests from 2 test cases.
	[----------] Global test environment set-up.
	[----------] 107 tests from BinaryTests
	[ RUN ] BinaryTests.Test_add_float_float
	[ OK ] BinaryTests.Test_add_float_float (567 ms)
	[ RUN ] BinaryTests.Test_add_float_float_left
	[ OK ] BinaryTests.Test_add_float_float_left (183 ms)
	[ RUN ] BinaryTests.Test_add_float_float_right
	>>> a = af.randu(5,5)
	>>> af.print_mem_info()
	Memory Info
	---------------------------------------------------------
	\| POINTER \| SIZE \| AF LOCK \| USER LOCK \|
	---------------------------------------------------------
	\| 0x706400000 \| 1 KB \| Yes \| No \|
	---------------------------------------------------------
	>>> b = af.randu(5,5)
	>>> af.print_mem_info()
	local cudaAvailable, _ = pcall(require, 'cunn')

	local function benchmark(opt)
	local isize = opt.inputSize or 100000
	local osize = opt.outputSize or 1
	local weightDecay = opt.weightDecay or 0
	local nnzMin = opt.featuresMinNumber or 1
	local nnzMax = opt.featuresMaxNumber or 10000
	local idxMin = 1
	local idxMax = isize
	#include <mutex>
	#include <thread>
	#include <cstdio>
	#include <vector>
	#include <iostream>

	class Node
	{
	private:
	int m_a, m_b, m_c;
	Params [Size: 10000 x 1, Batch: 1, nnz: (25,50), features: 385]
	Speedup (SL vs IL): GPU: 1.4440615290605
	----------------------------------------------------------------------------------

	Params [Size: 10000 x 1, Batch: 4, nnz: (25,50), features: 1457]
	Speedup (SL vs IL): GPU: 1.9608264852205
	----------------------------------------------------------------------------------

	Params [Size: 10000 x 1, Batch: 32, nnz: (25,50), features: 12087]
	Speedup (SL vs IL): GPU: 5.6983979608533