Skip to content

Instantly share code, notes, and snippets.

View leslie-fang-intel's full-sized avatar

leslie-fang-intel

View GitHub Profile
The running script as:
```
import torch
import torch.nn as nn
import torch._dynamo as torchdynamo
import copy
class Mod(torch.nn.Module):
def __init__(self) -> None:
super().__init__()
guards is: {
Guard(name='self', source=<GuardSource.LOCAL: 0>, create_fn=<function GuardBuilder.NN_MODULE at 0x7f01161ab160>, is_volatile=False, guard_types=['ID_MATCH'], code_list=['___check_obj_id(self, 139643301645088)'], obj_weakref=<weakref at 0x7f00fa06a180; to 'Mod' at 0x7f013d63cb20>, guarded_class_weakref=<weakref at 0x7f01362b6360; to 'type' at 0x5572acee40e0 (Mod)>),
Guard(name='x', source=<GuardSource.LOCAL: 0>, create_fn=<function GuardBuilder.TENSOR_MATCH at 0x7f01161ab8b0>, is_volatile=False, guard_types=['TENSOR_MATCH'], code_list=None, obj_weakref=<weakref at 0x7f0116fac2c0; to 'Tensor' at 0x7f0116ed6810>, guarded_class_weakref=<weakref at 0x7f0119143900; to 'torch._C._TensorMeta' at 0x5572ab714830 (Tensor)>),
Guard(name='self.linear', source=<GuardSource.LOCAL_NN_MODULE: 2>, create_fn=<function GuardBuilder.NN_MODULE at 0x7f01161ab160>, is_volatile=False, guard_types=None, code_list=None, obj_weakref=None, guarded_class_weakref=None),
Guard(name='torch', source=<GuardSource.GLOBAL: 1>, cr
----------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------
Name Self CPU % Self CPU CPU total % CPU total CPU time avg # of Calls
----------------------------------------------- ------------ ------------ ------------ ------------ ------------ ------------
quantized::conv_relu_int8_packed_weight 13.77% 69.356ms 20.19% 101.737ms 3.083ms 33
quantized::conv_int8_packed_weight 13.59% 68.488ms 19.10% 96.245ms 4.812ms 20
graph_1_kernel_cpp_1 9.96% 50.175ms 9.96% 50.175ms 50.175ms 1
graph_1_kernel_cpp_2 9.32% 46.958ms 9.32% 46.958ms 46.958ms 1
graph_1_kernel_cpp_3 6.54% 32.936ms 6.54%
kernel_cpp_0 = async_compile.cpp('''
#include <ATen/record_function.h>
#include "/tmp/torchinductor_root/dm/cdmaihqxwe73zkb3he2zizktpq5uujetg2db26c3r4lgsmlx3b4c.h"
extern "C" void kernel(const float* __restrict__ in_ptr0,
const float* __restrict__ in_ptr1,
const long* __restrict__ in_ptr2,
unsigned char* __restrict__ out_ptr0)
{
RECORD_FUNCTION("graph_1_kernel_cpp_0", c10::ArrayRef<c10::IValue>({}));
#pragma omp parallel num_threads(28)
import torch
import torch._dynamo as torchdynamo
import copy
class Mod(torch.nn.Module):
def __init__(self) -> None:
super().__init__()
self.relu = torch.nn.ReLU()
def forward(self, x):
from ctypes import c_void_p, c_long
import torch
import random
from torch import empty_strided, as_strided, device
from torch._inductor.codecache import AsyncCompile
from torch._inductor.select_algorithm import extern_kernels
aten = torch.ops.aten
assert_size_stride = torch._C._dynamo.guards.assert_size_stride
async_compile = AsyncCompile()
import torch
import torch._dynamo as torchdynamo
from torch.ao.quantization import (
get_default_qconfig,
QConfigMapping,
)
from torch.ao.quantization._quantize_pt2e import prepare_pt2e, convert_pt2e
from torch._inductor.compile_fx import compile_fx
def test_single_conv():
@leslie-fang-intel
leslie-fang-intel / gist:f8b9df5aefdf72f2111d5237fb178ff0
Created March 2, 2023 08:35
RN50 Int8 Inductor Generated Code
from ctypes import c_void_p, c_long
import torch
import math
import random
from torch import empty_strided, as_strided, device
from torch._inductor.codecache import AsyncCompile
from torch._inductor.select_algorithm import extern_kernels
aten = torch.ops.aten
assert_size_stride = torch._C._dynamo.guards.assert_size_stride
# Running CMD is: clear && TORCHDYNAMO_DYNAMIC_SHAPES=1 python test_rn50.py 2>&1 | tee test.log
import torch
import torch._dynamo as torchdynamo
import torchvision.models as models
import copy
from torch._inductor.compile_fx import compile_fx
import logging
torch._dynamo.config.log_level = logging.DEBUG
from ctypes import c_void_p, c_long
import torch
import math
import random
from torch import empty_strided, as_strided, device
from torch._inductor.codecache import AsyncCompile
from torch._inductor.select_algorithm import extern_kernels
aten = torch.ops.aten
assert_size_stride = torch._C._dynamo.guards.assert_size_stride