Will Constable wconstab

## cifar10_allcnn.py
#!/usr/bin/env python
# ----------------------------------------------------------------------------
# Copyright 2015 Nervana Systems Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software

## load_data.py
import cv2
import numpy as np
import os
import argparse
import cPickle

def decode_image(filename, dim=32):
    """
    Helper function to read an image file and return it as a flattened (CHW) array
    """

## multiprocess_rpdb.py
from multiprocessing import Process
import rpdb


def f():
    print("Hello")
    rpdb.set_trace()
    print("next")


## repro-quant-gil.py
import torch
import torch.nn as nn
from concurrent.futures import ThreadPoolExecutor
import time
import argparse
from tabulate import tabulate

class Model(nn.Module):
    def __init__(self, in_f, out_f, hidden=1000, layers=10):
        super().__init__()

## maskrcnn_failure
=================================== FAILURES ===================================
__________ TestBenchNetwork.test_eval[maskrcnn_benchmark-cuda-eager] ___________

self = <test_bench.TestBenchNetwork object at 0x7fd908efa6d0>
hub_model = <torchbenchmark.models.maskrcnn_benchmark.Model object at 0x7fd8f231d3d0>
benchmark = <pytest_benchmark.fixture.BenchmarkFixture object at 0x7fd8e9d433d0>

    def test_eval(self, hub_model, benchmark):
        try:
>           benchmark(hub_model.eval)

## LazyNativeFunctions.cpp
// @generated by tools/codegen/gen.py from DispatchKeyNativeFunctions.cpp
#include "ATen/MetaFunctions.h"
#include "lazy_tensor_core/csrc/aten_ltc_bridge.h"
#include "lazy_tensor_core/csrc/helpers.h"
#include "lazy_tensor_core/csrc/tensor.h"
#include "lazy_tensor_core/csrc/tensor_util.h"
#include "/home/whc/pytorch/lazy_tensor_core/scripts/../lazy_tensor_core/csrc/ts_backend/LazyNativeFunctions.h"
#include "/home/whc/pytorch/lazy_tensor_core/scripts/../lazy_tensor_core/csrc/ts_backend/LazyLazyIr.h"
#include "/home/whc/pytorch/lazy_tensor_core/scripts/../lazy_tensor_core/csrc/ts_backend/LazyShapeDtype.h"

## README.md

      
              4 files
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                wconstab
                / README.md
            
            
              Created
              October 21, 2021 16:26
            
              
                Repro for bias_dropout_add_layernorm
              
          
    Steps to repro


sync to wconstab/dropout branch of pytorch, which I just rebased on lazy_tensor_staging 10/21 and see no change in behavior
run rm mytest; LTC_SAVE_TENSORS_FILE=mytest PYTORCH_JIT_LOG_LEVEL="&gt;&gt;&gt;graph_fuser" LTC_TS_CUDA=1 python bias_dropout_add_layernorm.py


## README.md

      
              4 files
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                wconstab
                / README.md
            
            
              Created
              October 21, 2021 16:31
            
              
                follow-up repro using Kevin's script:
              
          
    Notes:

same runner.py as https://gist.github.com/wconstab/9802986a1353ee8eb14e12d1e6a23b79
run with rm mytest; LTC_SAVE_TENSORS_FILE=mytest PYTORCH_JIT_LOG_LEVEL=">>>graph_fuser" LTC_TS_CUDA=1 python bias_dropout_add_layernorm.py > console.log 2>&1
Observed this warning for some reason:
[W manager.cpp:305] Warning: FALLBACK path has been taken. This is an indication that codegenFailed for some reason. To debug try disable codegen fallback pathvia setting the env variableexport PYTORCH_NVFUSER_DISABLE_FALLBACK=1 (function runCudaFusionGroup)
But I don't see the fragmentation of backward that you mentioned. I'm also not sure if backward is complete in this case, it only includes native_layer_norm and sum.

  
## bert_ir_baseline_with_lr_hack
[ScheduleSyncTensorsGraph]
TensorsGraphInfo:
  to_device (/home/whc/pytorch/lazy_tensor_core/lazy_bench.py:294)
  check_results (/home/whc/pytorch/lazy_tensor_core/lazy_bench.py:361)
  <module> (/home/whc/pytorch/lazy_tensor_core/lazy_bench.py:440)

Hashes: (51812b4e6a763b887a0ee6c07ddc9e86)

## BEGIN_GRAPH
IR {

## gist:f4b6fcc3b6c69facd5c2ec5353ea5463
[ RUN      ] LazyOpsTest.TestNllLoss
/var/lib/jenkins/workspace/aten/src/ATen/native/LossNLL.cpp:266:16: runtime error: division by zero
    #0 0x7f9abe2167ce in void at::native::(anonymous namespace)::nll_loss_out_frame<float, long>(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, long, long) (/opt/conda/lib/python3.7/site-packages/torch/bin/libtorch_cpu.so+0xbbaa7ce)
    #1 0x7f9abe204c3c in at::native::(anonymous namespace)::nll_loss_forward_out_cpu_template(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, long, long) (/opt/conda/lib/python3.7/site-packages/torch/bin/libtorch_cpu.so+0xbb98c3c)
    #2 0x7f9abe20443c in at::native::structured_nll_loss_forward_out_cpu::impl(at::Tensor const&, at::Tensor const&, at::OptionalTensorRef, long, long, at::Tensor const&, at::Tensor const&) (/opt/conda/lib/python3.7/site-packages/torch/bin/libtorch_cpu.so+0xbb9843c)
    #3 0x7f9abfd205c0 in at::(anonymous namespace)::wrapper_n
	#!/usr/bin/env python
	# ----------------------------------------------------------------------------
	# Copyright 2015 Nervana Systems Inc.
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	import cv2
	import numpy as np
	import os
	import argparse
	import cPickle

	def decode_image(filename, dim=32):
	"""
	Helper function to read an image file and return it as a flattened (CHW) array
	"""
	from multiprocessing import Process
	import rpdb


	def f():
	print("Hello")
	rpdb.set_trace()
	print("next")
	import torch
	import torch.nn as nn
	from concurrent.futures import ThreadPoolExecutor
	import time
	import argparse
	from tabulate import tabulate

	class Model(nn.Module):
	def __init__(self, in_f, out_f, hidden=1000, layers=10):
	super().__init__()
	=================================== FAILURES ===================================
	__________ TestBenchNetwork.test_eval[maskrcnn_benchmark-cuda-eager] ___________

	self = <test_bench.TestBenchNetwork object at 0x7fd908efa6d0>
	hub_model = <torchbenchmark.models.maskrcnn_benchmark.Model object at 0x7fd8f231d3d0>
	benchmark = <pytest_benchmark.fixture.BenchmarkFixture object at 0x7fd8e9d433d0>

	def test_eval(self, hub_model, benchmark):
	try:
	> benchmark(hub_model.eval)
	// @generated by tools/codegen/gen.py from DispatchKeyNativeFunctions.cpp
	#include "ATen/MetaFunctions.h"
	#include "lazy_tensor_core/csrc/aten_ltc_bridge.h"
	#include "lazy_tensor_core/csrc/helpers.h"
	#include "lazy_tensor_core/csrc/tensor.h"
	#include "lazy_tensor_core/csrc/tensor_util.h"
	#include "/home/whc/pytorch/lazy_tensor_core/scripts/../lazy_tensor_core/csrc/ts_backend/LazyNativeFunctions.h"
	#include "/home/whc/pytorch/lazy_tensor_core/scripts/../lazy_tensor_core/csrc/ts_backend/LazyLazyIr.h"
	#include "/home/whc/pytorch/lazy_tensor_core/scripts/../lazy_tensor_core/csrc/ts_backend/LazyShapeDtype.h"
	[ScheduleSyncTensorsGraph]
	TensorsGraphInfo:
	to_device (/home/whc/pytorch/lazy_tensor_core/lazy_bench.py:294)
	check_results (/home/whc/pytorch/lazy_tensor_core/lazy_bench.py:361)
	<module> (/home/whc/pytorch/lazy_tensor_core/lazy_bench.py:440)

	Hashes: (51812b4e6a763b887a0ee6c07ddc9e86)

	## BEGIN_GRAPH
	IR {
	[ RUN ] LazyOpsTest.TestNllLoss
	/var/lib/jenkins/workspace/aten/src/ATen/native/LossNLL.cpp:266:16: runtime error: division by zero
	#0 0x7f9abe2167ce in void at::native::(anonymous namespace)::nll_loss_out_frame<float, long>(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, long, long) (/opt/conda/lib/python3.7/site-packages/torch/bin/libtorch_cpu.so+0xbbaa7ce)
	#1 0x7f9abe204c3c in at::native::(anonymous namespace)::nll_loss_forward_out_cpu_template(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, long, long) (/opt/conda/lib/python3.7/site-packages/torch/bin/libtorch_cpu.so+0xbb98c3c)
	#2 0x7f9abe20443c in at::native::structured_nll_loss_forward_out_cpu::impl(at::Tensor const&, at::Tensor const&, at::OptionalTensorRef, long, long, at::Tensor const&, at::Tensor const&) (/opt/conda/lib/python3.7/site-packages/torch/bin/libtorch_cpu.so+0xbb9843c)
	#3 0x7f9abfd205c0 in at::(anonymous namespace)::wrapper_n