CharlesJQuarra

## grad_checkpoint_linear.py
import torch
import torch.nn as nn
import torch.utils.checkpoint as checkpoint

def get_segments(total, max_length):
  if total > max_length:
    segments = (total // max_length)
  else:
    segments = 1
  return (segments-1)*[max_length] + [total - (segments-1)*max_length]

## fat_cat.py
import torch

""""
behavior:
fat_cat([torch.randn(1,7,20), torch.randn(5,1,13)], dim=-1).size() == torch.Size([5, 7, 33])
""""


def axis_repeat(t, dim, times):
  if t.size()[dim] != 1:
	import torch
	import torch.nn as nn
	import torch.utils.checkpoint as checkpoint

	def get_segments(total, max_length):
	if total > max_length:
	segments = (total // max_length)
	else:
	segments = 1
	return (segments-1)[max_length] + [total - (segments-1)max_length]
	import torch

	""""
	behavior:
	fat_cat([torch.randn(1,7,20), torch.randn(5,1,13)], dim=-1).size() == torch.Size([5, 7, 33])
	""""


	def axis_repeat(t, dim, times):
	if t.size()[dim] != 1: