rohan-varma/ddp_custom.py

## ddp_custom.py
import torch
import torch.nn as nn
from torch.autograd import Function


class PassThrough(Function):
    @staticmethod
    def forward(ctx, *inputs):
        return inputs

    @staticmethod
    def backward(ctx, *grad_outputs):
        print(f"grad_outputs in PassThrough backward {grad_outputs}")
        return grad_outputs


class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.a = nn.Linear(1, 1, bias=False)
        self.b = nn.Linear(1, 1, bias=False)

    def forward(self, x):
        a, b = self.a(x), self.b(x)
        # Get tensors from tuple. This would be a more general call to
        # _find_tensors.
        ret = a, b
        new_a, new_b = PassThrough.apply(a, b)
        # Reconstruct tuple from output tensors. This would require a more general
        # function that repacks the tensor(s) into the data structure.
        ret = new_a, new_b
        return ret


model = MyModel()


def print_grads():
    for param_name, param in model.named_parameters():
        print(f"{param_name} : {param.grad}")


inp = torch.ones(1)
print("-- before backward ---")
print_grads()

for _ in range(3):
    model.zero_grad()
    out = model(inp)
    loss = out[0].sum()
    print("Calling backward...")
    loss.backward()
    print("-- after bwd --")
    print_grads()
	import torch
	import torch.nn as nn
	from torch.autograd import Function


	class PassThrough(Function):
	@staticmethod
	def forward(ctx, *inputs):
	return inputs

	@staticmethod
	def backward(ctx, *grad_outputs):
	print(f"grad_outputs in PassThrough backward {grad_outputs}")
	return grad_outputs


	class MyModel(nn.Module):
	def __init__(self):
	super().__init__()
	self.a = nn.Linear(1, 1, bias=False)
	self.b = nn.Linear(1, 1, bias=False)

	def forward(self, x):
	a, b = self.a(x), self.b(x)
	# Get tensors from tuple. This would be a more general call to
	# _find_tensors.
	ret = a, b
	new_a, new_b = PassThrough.apply(a, b)
	# Reconstruct tuple from output tensors. This would require a more general
	# function that repacks the tensor(s) into the data structure.
	ret = new_a, new_b
	return ret


	model = MyModel()


	def print_grads():
	for param_name, param in model.named_parameters():
	print(f"{param_name} : {param.grad}")


	inp = torch.ones(1)
	print("-- before backward ---")
	print_grads()

	for _ in range(3):
	model.zero_grad()
	out = model(inp)
	loss = out[0].sum()
	print("Calling backward...")
	loss.backward()
	print("-- after bwd --")
	print_grads()