torridgristle

## Denoiser Slew Limiter.py
import torch
import torch.nn as nn
import torchvision.transforms.functional as TF

1class CFGDenoiserSlew(nn.Module):
    '''
    Clamps the maximum change each step can have.
    "limit" is the clamp bounds. 0.4-0.8 seem good, 1.6 and 3.2 have very little difference and might represent the upper bound of values.
    "blur" is the radius of a gaussian blur used to split the limited output with the original output in an attempt to preserve detail and color.
    "last_step_is_blur" if true will compare the model output to the blur-split output rather than just the limited output, can look nicer.

## prompt_mass_encoding_randomization.py
import itertools

def prompt_combinations(prompt_parts):
    '''
    Provide a list of lists of prompt parts, like:
    [ ["A ","An "], ["anteater","feather duster"] ]
    '''
    opt_prompt = list(itertools.product(*prompt_parts, repeat=1))
    opt_prompt = [''.join(opt_prompt[b]) for b in range(len(opt_prompt))]
    return opt_prompt

## CLIP ViT-L14 token embedding clusters.txt
I used CLIP ViT-L/14 token embeddings for tokens that were ASCII-only, ended in </w> meaning it wasn't a prefix (but doesn't garuntee it's a full word), started with a letter, had 3 or more letters, and wasn't the end/start tokens.
Due to memory constraints I had to break it down to a smaller number of channels so it's using the PCA of these, reducing 768 channels to 512. From there, kmeans using https://github.com/subhadarship/kmeans_pytorch with 128 clusters and cosine distance within pytorch's autocast wrapper in the hopes of saving from memory.
The cluster IDs from it were then used to average the non-PCA tokens, so the PCA was only affecting the clustering but not the actual values of the clusters. The averaging was done in .half() precision for memory.
The words listed are the 64 best matching tokens (cosine similarity) for each cluster center.

Cluster 0 :

marker
markers
signaling

## depth_map_blur.py
#1 is end and 0 is start in the map.
def map_blur(img,map,s_start=0.375,s_end=8,steps=8):
    img_slices = img * 0
    map_slices = map * 0
    for s in range(steps):
        sigma = (s/(steps-1)) * (s_end-s_start) + s_start
        slice_start = (s+0)/steps
        slice_end   = (s+1)/steps
        map_slice   = torch.logical_and(
            torch.greater_equal(map,slice_start),

## Max Smooth Unpooling.py
# Perform max pool 2d with indicies on a tensor
max_size = 8
max_output, max_indices = F.max_pool2d_with_indices(input_tensor,max_size)

# Unpool it to get a tensor of the original size with zeros in all non-max areas
max_unpool = F.max_unpool2d(max_output,max_indices,max_size,max_size)
# Unpool it using a tensor of ones with the same indices to get ones where the tensor was sampled
max_mask   = F.max_unpool2d(torch.ones_like(max_output),max_indices,max_size,max_size)

# Makes a kernel that's round and the distance from the center

## vqgan_dec_skip_lores_attn.py
def vqgan_dec_skip_lores_attn(h, temb=None):
        # middle
        h = vqgan.decoder.mid.block_1(h, temb)
        h_half = F.upsample(h,scale_factor=0.5,mode='bicubic',align_corners=False)
        h_half = vqgan.decoder.mid.attn_1(h_half) - h_half
        h_half = F.upsample(h_half,scale_factor=2,mode='bicubic',align_corners=False)
        h = h + h_half
        h = vqgan.decoder.mid.block_2(h, temb)

        # upsampling

## sobel_scharr_farid_modules.py
import torch
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Sobel(nn.Module):
    def __init__(self,structure=False,scharr=True, padding_mode='reflect'):
        super().__init__()
        self.structure = structure

## residual_concatenate.py
import torch
import torch.nn as nn

class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn
    def forward(self, x, *args, **kwargs):
        return self.fn(x, *args, **kwargs) + x

## kaiser_lowpass.py
import torch
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class KaiserLowpass(nn.Module):
    def __init__(self, width=7, beta=11, periodic=False, padding_mode='replicate'):
        super().__init__()
        self.padding_mode = padding_mode
	import torch
	import torch.nn as nn
	import torchvision.transforms.functional as TF

	1class CFGDenoiserSlew(nn.Module):
	'''
	Clamps the maximum change each step can have.
	"limit" is the clamp bounds. 0.4-0.8 seem good, 1.6 and 3.2 have very little difference and might represent the upper bound of values.
	"blur" is the radius of a gaussian blur used to split the limited output with the original output in an attempt to preserve detail and color.
	"last_step_is_blur" if true will compare the model output to the blur-split output rather than just the limited output, can look nicer.
	import itertools

	def prompt_combinations(prompt_parts):
	'''
	Provide a list of lists of prompt parts, like:
	[ ["A ","An "], ["anteater","feather duster"] ]
	'''
	opt_prompt = list(itertools.product(*prompt_parts, repeat=1))
	opt_prompt = [''.join(opt_prompt[b]) for b in range(len(opt_prompt))]
	return opt_prompt
	I used CLIP ViT-L/14 token embeddings for tokens that were ASCII-only, ended in </w> meaning it wasn't a prefix (but doesn't garuntee it's a full word), started with a letter, had 3 or more letters, and wasn't the end/start tokens.
	Due to memory constraints I had to break it down to a smaller number of channels so it's using the PCA of these, reducing 768 channels to 512. From there, kmeans using https://github.com/subhadarship/kmeans_pytorch with 128 clusters and cosine distance within pytorch's autocast wrapper in the hopes of saving from memory.
	The cluster IDs from it were then used to average the non-PCA tokens, so the PCA was only affecting the clustering but not the actual values of the clusters. The averaging was done in .half() precision for memory.
	The words listed are the 64 best matching tokens (cosine similarity) for each cluster center.

	Cluster 0 :

	marker
	markers
	signaling
	#1 is end and 0 is start in the map.
	def map_blur(img,map,s_start=0.375,s_end=8,steps=8):
	img_slices = img * 0
	map_slices = map * 0
	for s in range(steps):
	sigma = (s/(steps-1)) * (s_end-s_start) + s_start
	slice_start = (s+0)/steps
	slice_end = (s+1)/steps
	map_slice = torch.logical_and(
	torch.greater_equal(map,slice_start),
	# Perform max pool 2d with indicies on a tensor
	max_size = 8
	max_output, max_indices = F.max_pool2d_with_indices(input_tensor,max_size)

	# Unpool it to get a tensor of the original size with zeros in all non-max areas
	max_unpool = F.max_unpool2d(max_output,max_indices,max_size,max_size)
	# Unpool it using a tensor of ones with the same indices to get ones where the tensor was sampled
	max_mask = F.max_unpool2d(torch.ones_like(max_output),max_indices,max_size,max_size)

	# Makes a kernel that's round and the distance from the center
	def vqgan_dec_skip_lores_attn(h, temb=None):
	# middle
	h = vqgan.decoder.mid.block_1(h, temb)
	h_half = F.upsample(h,scale_factor=0.5,mode='bicubic',align_corners=False)
	h_half = vqgan.decoder.mid.attn_1(h_half) - h_half
	h_half = F.upsample(h_half,scale_factor=2,mode='bicubic',align_corners=False)
	h = h + h_half
	h = vqgan.decoder.mid.block_2(h, temb)

	# upsampling
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	class Sobel(nn.Module):
	def __init__(self,structure=False,scharr=True, padding_mode='reflect'):
	super().__init__()
	self.structure = structure
	import torch
	import torch.nn as nn

	class Residual(nn.Module):
	def __init__(self, fn):
	super().__init__()
	self.fn = fn
	def forward(self, x, args, *kwargs):
	return self.fn(x, args, *kwargs) + x