baiyfbupt/compare_paddle_addcmul.py

## compare_paddle_addcmul.py
import torch
import torch.nn as nn
import paddle
import paddle.fluid as fluid
import unittest
import time
import numpy as np

device_type = "cpu"
#device_type = "cuda:0"
cpu_epoch = 100
gpu_epoch = 10000

def get_paddle_result(input_np, tensor1_np, tensor2_np, value):
    with fluid.program_guard(fluid.Program(), fluid.Program()):
        input = fluid.layers.create_global_var(
            shape=input_np.shape,
            value=0,
            dtype=input_np.dtype,
            name='input',
            persistable=True)
        tensor1 = fluid.layers.create_global_var(
            shape=tensor1_np.shape,
            value=0,
            dtype=tensor1_np.dtype,
            name='tensor1',
            persistable=True)
        tensor2 = fluid.layers.create_global_var(
            shape=tensor2_np.shape,
            value=0,
            dtype=tensor2_np.dtype,
            name='tensor2',
            persistable=True)
        out = paddle.addcmul(input, tensor1, tensor2, value)

        device = fluid.CPUPlace()
        epoch = cpu_epoch
        if device_type == "cuda:0":
            device = fluid.CUDAPlace(0)
            epoch = gpu_epoch

        paddle_time = 0.0
        result = None
        input_var = fluid.global_scope().var(input.name).get_tensor()
        input_var.set(input_np, device)
        tensor1_var = fluid.global_scope().var(tensor1.name).get_tensor()
        tensor1_var.set(tensor1_np, device)
        tensor2_var = fluid.global_scope().var(tensor2.name).get_tensor()
        tensor2_var.set(tensor2_np, device)

        exe = fluid.Executor(device)
        for i in range(epoch):
           start_time = time.time()
           exe.run()
           paddle_time +=( time.time() - start_time)

        result, = exe.run(feed={"input": input_np, "tensor1": tensor1_np, "tensor2": tensor2_np}, fetch_list=[out])
        return result, paddle_time/epoch


def get_torch_result(input_np, tensor1_np,  tensor2_np, value):
    device = torch.device(device_type)
    input = torch.tensor(input_np).to(device)
    tensor1 = torch.tensor(tensor1_np).to(device)
    tensor2 = torch.tensor(tensor2_np).to(device)
    epoch = cpu_epoch
    if device_type == "cuda:0":
        epoch = gpu_epoch

    torch_time = 0.0
    result = None
    for i in range(epoch):
        start_time = time.time()
        result = torch.addcmul(input, value, tensor1, tensor2)
        torch.cuda.synchronize()
        torch_time += (time.time() - start_time)
    return result, torch_time/epoch

class TestPaddlePytorchAPIConsistency(unittest.TestCase):
    def test_addcmul(self):
        input = np.random.random((100, 100, 100)).astype("float64")
        tensor1 = np.random.random((100, 100, 100)).astype("float64")
        tensor2 = np.random.random((100, 100, 100)).astype("float64")
        value = 1.0
        torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
        paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
        self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
        print("case1: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

class TestPaddlePytorchAPIConsistency2(unittest.TestCase):
    def test_addcmul(self):
        input = np.random.random((100, 100, 100)).astype("float32")
        tensor1 = np.random.random((100, 100, 100)).astype("float32")
        tensor2 = np.random.random((100, 100, 100)).astype("float32")
        value = 1.0
        torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
        paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
        self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
        print("case2: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

class TestPaddlePytorchAPIConsistency3(unittest.TestCase):
    def test_addcmul(self):
        input = np.random.random((100, 100, 100)).astype("int64")
        tensor1 = np.random.random((100, 100, 100)).astype("int64")
        tensor2 = np.random.random((100, 100, 100)).astype("int64")
        value = 1
        torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
        paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
        self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
        print("case3: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

class TestPaddlePytorchAPIConsistency4(unittest.TestCase):
    def test_addcmul(self):
        input = np.random.random((100, 100, 100)).astype("int32")
        tensor1 = np.random.random((100, 100, 100)).astype("int32")
        tensor2 = np.random.random((100, 100, 100)).astype("int32")
        value = 1
        torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
        paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
        self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
        print("case4: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))


class TestPaddlePytorchAPIConsistency5(unittest.TestCase):
    def test_addcmul(self):
        input = np.random.random((100, 100)).astype("float64")
        tensor1 = np.random.random((100, 100, 100)).astype("float64")
        tensor2 = np.random.random((100, 100, 100)).astype("float64")
        value = 2.0
        torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
        paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
        self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
        print("case5: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

class TestPaddlePytorchAPIConsistency6(unittest.TestCase):
    def test_addcmul(self):
        input = np.random.random((100, 100, 100)).astype("float64")
        tensor1 = np.random.random((100, 100)).astype("float64")
        tensor2 = np.random.random((100, 100, 100)).astype("float64")
        value = 9.99
        torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
        paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
        self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
        print("case6: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

class TestPaddlePytorchAPIConsistency7(unittest.TestCase):
    def test_addcmul(self):
        input = np.random.random((100, 100, 100)).astype("float64")
        tensor1 = np.random.random((100, 100, 100)).astype("float64")
        tensor2 = np.random.random((100, 100)).astype("float64")
        value = 1.1
        torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
        paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
        self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
        print("case7: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

if __name__ == "__main__":
    unittest.main()
	import torch
	import torch.nn as nn
	import paddle
	import paddle.fluid as fluid
	import unittest
	import time
	import numpy as np

	device_type = "cpu"
	#device_type = "cuda:0"
	cpu_epoch = 100
	gpu_epoch = 10000

	def get_paddle_result(input_np, tensor1_np, tensor2_np, value):
	with fluid.program_guard(fluid.Program(), fluid.Program()):
	input = fluid.layers.create_global_var(
	shape=input_np.shape,
	value=0,
	dtype=input_np.dtype,
	name='input',
	persistable=True)
	tensor1 = fluid.layers.create_global_var(
	shape=tensor1_np.shape,
	value=0,
	dtype=tensor1_np.dtype,
	name='tensor1',
	persistable=True)
	tensor2 = fluid.layers.create_global_var(
	shape=tensor2_np.shape,
	value=0,
	dtype=tensor2_np.dtype,
	name='tensor2',
	persistable=True)
	out = paddle.addcmul(input, tensor1, tensor2, value)

	device = fluid.CPUPlace()
	epoch = cpu_epoch
	if device_type == "cuda:0":
	device = fluid.CUDAPlace(0)
	epoch = gpu_epoch

	paddle_time = 0.0
	result = None
	input_var = fluid.global_scope().var(input.name).get_tensor()
	input_var.set(input_np, device)
	tensor1_var = fluid.global_scope().var(tensor1.name).get_tensor()
	tensor1_var.set(tensor1_np, device)
	tensor2_var = fluid.global_scope().var(tensor2.name).get_tensor()
	tensor2_var.set(tensor2_np, device)

	exe = fluid.Executor(device)
	for i in range(epoch):
	start_time = time.time()
	exe.run()
	paddle_time +=( time.time() - start_time)

	result, = exe.run(feed={"input": input_np, "tensor1": tensor1_np, "tensor2": tensor2_np}, fetch_list=[out])
	return result, paddle_time/epoch


	def get_torch_result(input_np, tensor1_np, tensor2_np, value):
	device = torch.device(device_type)
	input = torch.tensor(input_np).to(device)
	tensor1 = torch.tensor(tensor1_np).to(device)
	tensor2 = torch.tensor(tensor2_np).to(device)
	epoch = cpu_epoch
	if device_type == "cuda:0":
	epoch = gpu_epoch

	torch_time = 0.0
	result = None
	for i in range(epoch):
	start_time = time.time()
	result = torch.addcmul(input, value, tensor1, tensor2)
	torch.cuda.synchronize()
	torch_time += (time.time() - start_time)
	return result, torch_time/epoch

	class TestPaddlePytorchAPIConsistency(unittest.TestCase):
	def test_addcmul(self):
	input = np.random.random((100, 100, 100)).astype("float64")
	tensor1 = np.random.random((100, 100, 100)).astype("float64")
	tensor2 = np.random.random((100, 100, 100)).astype("float64")
	value = 1.0
	torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
	paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
	self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
	print("case1: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

	class TestPaddlePytorchAPIConsistency2(unittest.TestCase):
	def test_addcmul(self):
	input = np.random.random((100, 100, 100)).astype("float32")
	tensor1 = np.random.random((100, 100, 100)).astype("float32")
	tensor2 = np.random.random((100, 100, 100)).astype("float32")
	value = 1.0
	torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
	paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
	self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
	print("case2: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

	class TestPaddlePytorchAPIConsistency3(unittest.TestCase):
	def test_addcmul(self):
	input = np.random.random((100, 100, 100)).astype("int64")
	tensor1 = np.random.random((100, 100, 100)).astype("int64")
	tensor2 = np.random.random((100, 100, 100)).astype("int64")
	value = 1
	torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
	paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
	self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
	print("case3: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

	class TestPaddlePytorchAPIConsistency4(unittest.TestCase):
	def test_addcmul(self):
	input = np.random.random((100, 100, 100)).astype("int32")
	tensor1 = np.random.random((100, 100, 100)).astype("int32")
	tensor2 = np.random.random((100, 100, 100)).astype("int32")
	value = 1
	torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
	paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
	self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
	print("case4: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))


	class TestPaddlePytorchAPIConsistency5(unittest.TestCase):
	def test_addcmul(self):
	input = np.random.random((100, 100)).astype("float64")
	tensor1 = np.random.random((100, 100, 100)).astype("float64")
	tensor2 = np.random.random((100, 100, 100)).astype("float64")
	value = 2.0
	torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
	paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
	self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
	print("case5: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

	class TestPaddlePytorchAPIConsistency6(unittest.TestCase):
	def test_addcmul(self):
	input = np.random.random((100, 100, 100)).astype("float64")
	tensor1 = np.random.random((100, 100)).astype("float64")
	tensor2 = np.random.random((100, 100, 100)).astype("float64")
	value = 9.99
	torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
	paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
	self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
	print("case6: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

	class TestPaddlePytorchAPIConsistency7(unittest.TestCase):
	def test_addcmul(self):
	input = np.random.random((100, 100, 100)).astype("float64")
	tensor1 = np.random.random((100, 100, 100)).astype("float64")
	tensor2 = np.random.random((100, 100)).astype("float64")
	value = 1.1
	torch_res, torch_time = get_torch_result(input, tensor1, tensor2, value)
	paddle_res, paddle_time = get_paddle_result(input, tensor1, tensor2, value)
	self.assertEqual(np.allclose(paddle_res, torch_res.to(torch.device("cpu")).numpy()), True)
	print("case7: paddle_time:{:.6f}, torch_time:{:.6f}".format(paddle_time, torch_time))

	if __name__ == "__main__":
	unittest.main()