AmericanPresidentJimmyCarter/morph_with_midstate_prompt.py

## morph_with_midstate_prompt.py
from PIL import Image
import stable_inference
import numpy as np
from einops import repeat

'''
Interpolate between two images with a prompt of what you expect the midstate to be.

Alter the stuff below here to whatever you need it to be.
'''
# Get these from https://huggingface.co/spaces/pharma/CLIP-Interrogator
prompt_start = 'a close up of a pigeon on the ground, by John Wollaston, shutterstock contest winner, superflat, greta thunberg, gigapixel, cocky smirk, fibonacci sequences, pot-bellied, mike stoklasa, yearbook photo, puyallup berteronian'
prompt_end = 'a man with a tattoo on his arm, cobra, arney freytag!!!, gangrel, masculine jawline!, that resembles a bull\'s, blue eyes white dragon, realistic proportions sfw, awarding winning, menacing pose, half shaved haircut, zaun'

# Whatever you imagine your midstate looking like
prompt_midstate = 'anthropomorphic pigeon anthro furry hybrid man trending on artstation concept art covered in pigeon feathers beautiful muscles digital painting of a hybrid character large muscles bird'

START_IMAGE_FILE = 'start.jpg'
END_IMAGE_FILE = 'end.jpg

FOLDER = 'test'
MIX_PREV_FACTOR = 0. # Starts to corrupt a lot beyond 0.05
MAX_STRENGTH = 0.5 # Strength at maximum in the middle of the interpolation
SEED = 12345
SECONDS = 10
FRAMES_PER_SEC = 30
STEPS_IN_OUT = int((SECONDS * FRAMES_PER_SEC) / 2)

engine = stable_inference.StableDiffusionInference(
    checkpoint_loc='./sd-v1-5-inpainting.ckpt',
)

# Images must be the same size
# Attempt to align your images as best you can
start_pil = Image.open(START_IMAGE_FILE)
start_tensor, (_, _) = stable_inference.util.load_img(img=start_pil)
start_tensor = start_tensor.half().to('cuda')

end_pil = Image.open(END_IMAGE_FILE)
end_tensor, (_, _) = stable_inference.util.load_img(img=end_pil)
end_tensor = end_tensor.half().to('cuda')

start = engine.model.get_first_stage_encoding(
    engine.model.encode_first_stage(start_tensor))
end = engine.model.get_first_stage_encoding(
    engine.model.encode_first_stage(end_tensor))

(
    conditioning_start,
    unconditioning, # Reuse this as it's the same for both
    weighted_subprompts_start,
    _, # Don't need the individual embedding managers
) = engine.compute_conditioning_and_weights(
    prompt_start,
    1)

(
    conditioning_end,
    _,
    weighted_subprompts_end,
    _, # Don't need the individual embedding managers
) = engine.compute_conditioning_and_weights(
    prompt_end,
    1)

(
    conditioning_bird,
    _,
    _,
    _, # Don't need the individual embedding managers
) = engine.compute_conditioning_and_weights(
    prompt_midstate,
    1)


weighted_subprompts = stable_inference.util.combine_weighted_subprompts(0.5,
    weighted_subprompts_start,
    weighted_subprompts_end)

strength_schedule = []
last_samples = None
for itr, i in enumerate(np.linspace(0., 1., STEPS_IN_OUT)**(1/2)):
    print('step', itr, i)
    prob = i

    c = stable_inference.util.slerp(
        prob,
        conditioning_start,
        conditioning_bird,
    )

    slerped = stable_inference.util.slerp(prob / 2, start, end)

    # Blend in previous image
    if last_samples is not None:
        slerped = stable_inference.util.slerp(MIX_PREV_FACTOR, slerped, last_samples)

    strength_schedule.append(MAX_STRENGTH * prob + 0.01)
    last_samples, extra_data = engine.sample(
        '',
        1,
        'dpmpp_2m',
        SEED,
        50,
        conditioning=c,
        init_latent=repeat(slerped, '1 ... -> b ...', b=1),
        scale=7.5,
        strength=MAX_STRENGTH * prob + 0.01,
        unconditioning=unconditioning,
        weighted_subprompts=weighted_subprompts,
    )
    extra_data['images'][0].save(f'{FOLDER}/{itr}.png')

strength_schedule.reverse()
for itr, i in enumerate(np.flip(np.linspace(0., 1., STEPS_IN_OUT)**(1/2))):
    print('step', itr + STEPS_IN_OUT, i)
    prob = i

    c = stable_inference.util.slerp(
        1 - prob,
        conditioning_bird,
        conditioning_end,
    )

    slerped = stable_inference.util.slerp(0.5 + (1 - prob) / 2, start, end)
    slerped = stable_inference.util.slerp(MIX_PREV_FACTOR, slerped, last_samples)

    last_samples, extra_data = engine.sample(
        '',
        1,
        'dpmpp_2m',
        SEED,
        50,
        conditioning=c,
        init_latent=repeat(slerped, '1 ... -> b ...', b=1),
        scale=7.5,
        strength=strength_schedule[itr],
        unconditioning=unconditioning,
        weighted_subprompts=weighted_subprompts,
    )
    extra_data['images'][0].save(f'{FOLDER}/{itr + STEPS_IN_OUT}.png')

# Make a video @ 30 fps, 512x512 image size
# ffmpeg -r 30 -f image2 -s 512x512 -i test/%d.png -vcodec libx264 -crf 25 -pix_fmt yuv420p test.mp4
	from PIL import Image
	import stable_inference
	import numpy as np
	from einops import repeat

	'''
	Interpolate between two images with a prompt of what you expect the midstate to be.

	Alter the stuff below here to whatever you need it to be.
	'''
	# Get these from https://huggingface.co/spaces/pharma/CLIP-Interrogator
	prompt_start = 'a close up of a pigeon on the ground, by John Wollaston, shutterstock contest winner, superflat, greta thunberg, gigapixel, cocky smirk, fibonacci sequences, pot-bellied, mike stoklasa, yearbook photo, puyallup berteronian'
	prompt_end = 'a man with a tattoo on his arm, cobra, arney freytag!!!, gangrel, masculine jawline!, that resembles a bull\'s, blue eyes white dragon, realistic proportions sfw, awarding winning, menacing pose, half shaved haircut, zaun'

	# Whatever you imagine your midstate looking like
	prompt_midstate = 'anthropomorphic pigeon anthro furry hybrid man trending on artstation concept art covered in pigeon feathers beautiful muscles digital painting of a hybrid character large muscles bird'

	START_IMAGE_FILE = 'start.jpg'
	END_IMAGE_FILE = 'end.jpg

	FOLDER = 'test'
	MIX_PREV_FACTOR = 0. # Starts to corrupt a lot beyond 0.05
	MAX_STRENGTH = 0.5 # Strength at maximum in the middle of the interpolation
	SEED = 12345
	SECONDS = 10
	FRAMES_PER_SEC = 30
	STEPS_IN_OUT = int((SECONDS * FRAMES_PER_SEC) / 2)

	engine = stable_inference.StableDiffusionInference(
	checkpoint_loc='./sd-v1-5-inpainting.ckpt',
	)

	# Images must be the same size
	# Attempt to align your images as best you can
	start_pil = Image.open(START_IMAGE_FILE)
	start_tensor, (_, _) = stable_inference.util.load_img(img=start_pil)
	start_tensor = start_tensor.half().to('cuda')

	end_pil = Image.open(END_IMAGE_FILE)
	end_tensor, (_, _) = stable_inference.util.load_img(img=end_pil)
	end_tensor = end_tensor.half().to('cuda')

	start = engine.model.get_first_stage_encoding(
	engine.model.encode_first_stage(start_tensor))
	end = engine.model.get_first_stage_encoding(
	engine.model.encode_first_stage(end_tensor))

	(
	conditioning_start,
	unconditioning, # Reuse this as it's the same for both
	weighted_subprompts_start,
	_, # Don't need the individual embedding managers
	) = engine.compute_conditioning_and_weights(
	prompt_start,
	1)

	(
	conditioning_end,
	_,
	weighted_subprompts_end,
	_, # Don't need the individual embedding managers
	) = engine.compute_conditioning_and_weights(
	prompt_end,
	1)

	(
	conditioning_bird,
	_,
	_,
	_, # Don't need the individual embedding managers
	) = engine.compute_conditioning_and_weights(
	prompt_midstate,
	1)


	weighted_subprompts = stable_inference.util.combine_weighted_subprompts(0.5,
	weighted_subprompts_start,
	weighted_subprompts_end)

	strength_schedule = []
	last_samples = None
	for itr, i in enumerate(np.linspace(0., 1., STEPS_IN_OUT)**(1/2)):
	print('step', itr, i)
	prob = i

	c = stable_inference.util.slerp(
	prob,
	conditioning_start,
	conditioning_bird,
	)

	slerped = stable_inference.util.slerp(prob / 2, start, end)

	# Blend in previous image
	if last_samples is not None:
	slerped = stable_inference.util.slerp(MIX_PREV_FACTOR, slerped, last_samples)

	strength_schedule.append(MAX_STRENGTH * prob + 0.01)
	last_samples, extra_data = engine.sample(
	'',
	1,
	'dpmpp_2m',
	SEED,
	50,
	conditioning=c,
	init_latent=repeat(slerped, '1 ... -> b ...', b=1),
	scale=7.5,
	strength=MAX_STRENGTH * prob + 0.01,
	unconditioning=unconditioning,
	weighted_subprompts=weighted_subprompts,
	)
	extra_data['images'][0].save(f'{FOLDER}/{itr}.png')

	strength_schedule.reverse()
	for itr, i in enumerate(np.flip(np.linspace(0., 1., STEPS_IN_OUT)**(1/2))):
	print('step', itr + STEPS_IN_OUT, i)
	prob = i

	c = stable_inference.util.slerp(
	1 - prob,
	conditioning_bird,
	conditioning_end,
	)

	slerped = stable_inference.util.slerp(0.5 + (1 - prob) / 2, start, end)
	slerped = stable_inference.util.slerp(MIX_PREV_FACTOR, slerped, last_samples)

	last_samples, extra_data = engine.sample(
	'',
	1,
	'dpmpp_2m',
	SEED,
	50,
	conditioning=c,
	init_latent=repeat(slerped, '1 ... -> b ...', b=1),
	scale=7.5,
	strength=strength_schedule[itr],
	unconditioning=unconditioning,
	weighted_subprompts=weighted_subprompts,
	)
	extra_data['images'][0].save(f'{FOLDER}/{itr + STEPS_IN_OUT}.png')

	# Make a video @ 30 fps, 512x512 image size
	# ffmpeg -r 30 -f image2 -s 512x512 -i test/%d.png -vcodec libx264 -crf 25 -pix_fmt yuv420p test.mp4