Ailing Zhang ailzhang

## gist:568991eb2a2f685c09562699b849d9fd
import torch
import torch.nn as nn
import time
import subprocess
import argparse
import numpy as np
from torch.autograd import Variable
import pdb
def linearforward(batchsize, dim_in, dim_out):
    data = np.random.random_sample([batchsize, dim_in])

## pytorch_api_level.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                ailzhang
                / pytorch_api_level.md
            
            
              Last active
              December 11, 2018 19:18
            
              
                Pytorch API Level.md
              
          
    Torch level 1


function
Symbolic_implemented


gather


equal


and


iand


or


ior


xor


## pytorch_api_categorization.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ailzhang
                / pytorch_api_categorization.md
            
            
              Last active
              December 13, 2018 17:01
            
              
                Pytorch API categorization
              
          
    Torch level 1: Uncategorized


function
Symbolic_implemented


pin_memory


size


stride


to


storage_offset


set_


is_contiguous


## RegistrationDeclarations.h
// This file contains all native_functions that can be registered to
// and the schema string that they should be registered with

Tensor __and__(const Tensor & self, Scalar other); // aten::__and__.Scalar(Tensor self, Scalar other) -> Tensor
Tensor __and__(const Tensor & self, const Tensor & other); // aten::__and__.Tensor(Tensor self, Tensor other) -> Tensor
Tensor & __iand__(Tensor & self, Scalar other); // aten::__iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
Tensor & __iand__(Tensor & self, const Tensor & other); // aten::__iand__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
Tensor & __ilshift__(Tensor & self, Scalar other); // aten::__ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
Tensor & __ilshift__(Tensor & self, const Tensor & other); // aten::__ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
Tensor & __ior__(Tensor & self, Scalar other); // aten::__ior__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)

## conv_sweep.py
import torch
import math
import torch.nn.functional as F
import torch_xla


def test_conv2d(batch, input_shape, kernel, Cin, Cout, stride, padding, dilation, groups, device):
    input = torch.ones(batch, Cin, input_shape, input_shape, dtype=torch.float, device=device).requires_grad_()
    weight = torch.ones(Cout, int(Cin / groups), kernel, kernel, dtype=torch.float, device=device).requires_grad_()
    out = F.conv2d(input, weight, stride=stride, padding=padding, dilation=dilation, groups=groups).to(device)

## gist:54b8d31894aad2883ef4cbc1fde24a31
2019-09-04 20:04:55.629680: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) CPU:0 -> /job:localservice/replica:0/task:0/device:XLA_CPU:0
2019-09-04 20:04:55.629791: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:200] Worker grpc://localhost:40934 for /job:localservice/replica:0/task:0
2019-09-04 20:04:55.629800: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:204] XRT default device: CPU:0
2019-09-04 20:04:55.629828: I tensorflow/compiler/xla/xla_client/xrt_local_service.cc:40] Peer localservice 1 {localhost:40934}
2019-09-04 20:04:55.629959: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2019-09-04 20:04:55.655453: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2300000000 Hz
2019-09-04 20:04:55.661129: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f63e4000b50 executing computation

## gist:5129517bd630579ef69536afe33255cb
c10::optional<std::function<at::Tensor(const at::Tensor&)>> func=c10::nullopt;
if (!self.unsafeGetTensorImpl()->support_as_strided()) {
  func = [=](const at::Tensor& input_base) {
    return at::alias(input_base);
  };
}

c10::optional<std::function<at::Tensor(const at::Tensor&)>> func=c10::nullopt;
if (!self.unsafeGetTensorImpl()->support_as_strided()) {
  auto size_vec = size.vec();

## gist:99f966d2ca5c6d985c783c2d6888e255

0.Program arguments: /usr/lib/llvm-9/bin/clang -cc1 -triple x86_64-pc-linux-gnu -emit-obj -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SobolEngineOps.cpp -mrelocation-model pic -pic-level 2 -mthread-mod
el posix -fno-trapping-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -coverage-notes-file /var/lib/jenkins/pytorch/bui
ld/caffe2/CMakeFiles/torch_cpu.dir/__/aten/src/ATen/native/SobolEngineOps.cpp.gcno -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -isystem /var/lib/jenkins/pytorch/build/third_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../th
ird_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googlemock/include -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googletest/include -isystem /var/lib/jenkins/pytorch/third_party/
protobuf/src -isystem /opt/conda/include -isystem /var/lib/jenkins/pytorch/third_party/gemmlowp

## gist:f69b11f338997d375d3f69ae22bd1aec

0.Program arguments: /usr/lib/llvm-9/bin/clang -cc1 -triple x86_64-pc-linux-gnu -emit-obj -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SobolEngineOps.cpp -mrelocation-model pic -pic-level 2 -mthread-mod
el posix -fno-trapping-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -coverage-notes-file /var/lib/jenkins/pytorch/bui
ld/caffe2/CMakeFiles/torch_cpu.dir/__/aten/src/ATen/native/SobolEngineOps.cpp.gcno -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -isystem /var/lib/jenkins/pytorch/build/third_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../th
ird_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googlemock/include -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googletest/include -isystem /var/lib/jenkins/pytorch/third_party/
protobuf/src -isystem /opt/conda/include -isystem /var/lib/jenkins/pytorch/third_party/gemmlowp

## gist:db44d0a1911cae62e0bb794bff33f40a

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ailzhang
                / gist:db44d0a1911cae62e0bb794bff33f40a
            
            
              Last active
              June 4, 2020 15:41
            
          
jit_premul_bias + master
jit_premul_bias + Reduce time per guard


fuser=none executor=simple
13.53
12.79


fuser=none executor=profiling
19.52
13.88


fuser=te executor=profiling
13.44
12.14
	import torch
	import torch.nn as nn
	import time
	import subprocess
	import argparse
	import numpy as np
	from torch.autograd import Variable
	import pdb
	def linearforward(batchsize, dim_in, dim_out):
	data = np.random.random_sample([batchsize, dim_in])
	// This file contains all native_functions that can be registered to
	// and the schema string that they should be registered with

	Tensor __and__(const Tensor & self, Scalar other); // aten::__and__.Scalar(Tensor self, Scalar other) -> Tensor
	Tensor __and__(const Tensor & self, const Tensor & other); // aten::__and__.Tensor(Tensor self, Tensor other) -> Tensor
	Tensor & __iand__(Tensor & self, Scalar other); // aten::__iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
	Tensor & __iand__(Tensor & self, const Tensor & other); // aten::__iand__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
	Tensor & __ilshift__(Tensor & self, Scalar other); // aten::__ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
	Tensor & __ilshift__(Tensor & self, const Tensor & other); // aten::__ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
	Tensor & __ior__(Tensor & self, Scalar other); // aten::__ior__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
	import torch
	import math
	import torch.nn.functional as F
	import torch_xla


	def test_conv2d(batch, input_shape, kernel, Cin, Cout, stride, padding, dilation, groups, device):
	input = torch.ones(batch, Cin, input_shape, input_shape, dtype=torch.float, device=device).requires_grad_()
	weight = torch.ones(Cout, int(Cin / groups), kernel, kernel, dtype=torch.float, device=device).requires_grad_()
	out = F.conv2d(input, weight, stride=stride, padding=padding, dilation=dilation, groups=groups).to(device)
	2019-09-04 20:04:55.629680: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:196] XRT device (LOCAL) CPU:0 -> /job:localservice/replica:0/task:0/device:XLA_CPU:0
	2019-09-04 20:04:55.629791: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:200] Worker grpc://localhost:40934 for /job:localservice/replica:0/task:0
	2019-09-04 20:04:55.629800: I tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:204] XRT default device: CPU:0
	2019-09-04 20:04:55.629828: I tensorflow/compiler/xla/xla_client/xrt_local_service.cc:40] Peer localservice 1 {localhost:40934}
	2019-09-04 20:04:55.629959: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
	2019-09-04 20:04:55.655453: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2300000000 Hz
	2019-09-04 20:04:55.661129: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f63e4000b50 executing computation
	c10::optional<std::function<at::Tensor(const at::Tensor&)>> func=c10::nullopt;
	if (!self.unsafeGetTensorImpl()->support_as_strided()) {
	func = [=](const at::Tensor& input_base) {
	return at::alias(input_base);
	};
	}

	c10::optional<std::function<at::Tensor(const at::Tensor&)>> func=c10::nullopt;
	if (!self.unsafeGetTensorImpl()->support_as_strided()) {
	auto size_vec = size.vec();

	0.Program arguments: /usr/lib/llvm-9/bin/clang -cc1 -triple x86_64-pc-linux-gnu -emit-obj -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SobolEngineOps.cpp -mrelocation-model pic -pic-level 2 -mthread-mod
	el posix -fno-trapping-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -coverage-notes-file /var/lib/jenkins/pytorch/bui
	ld/caffe2/CMakeFiles/torch_cpu.dir/__/aten/src/ATen/native/SobolEngineOps.cpp.gcno -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -isystem /var/lib/jenkins/pytorch/build/third_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../th
	ird_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googlemock/include -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googletest/include -isystem /var/lib/jenkins/pytorch/third_party/
	protobuf/src -isystem /opt/conda/include -isystem /var/lib/jenkins/pytorch/third_party/gemmlowp
	jit_premul_bias + master	jit_premul_bias + Reduce time per guard
fuser=none executor=simple	13.53	12.79
fuser=none executor=profiling	19.52	13.88
fuser=te executor=profiling	13.44	12.14