zeikomi552/inpaint2.py

## inpaint2.py
from PIL import Image
import argparse, os, sys, glob
import torch
from torch import autocast
from diffusers import (
    DDIMScheduler,
    StableDiffusionInpaintPipeline
)

def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--init-img",
        type=str,
        nargs="?",
        help="path to the input image"
    )
    parser.add_argument(
        "--mask-img",
        type=str,
        nargs="?",
        help="path to the mask image"
    )
    parser.add_argument(
        "--out-img",
        type=str,
        nargs="?",
        help="path to the output image"
    )

    parser.add_argument(
        "--prompt",
        type=str,
        nargs="?",
        default="a painting of a virus monster playing guitar",
        help="the prompt to render"
    )

    parser.add_argument(
        "--accesstoken",
        type=str,
        nargs="?",
        default="None",
        help="Hugging Face Access Token"
    )

    parser.add_argument(
        "--seed",
        type=int,
        default=42,
        help="the seed (for reproducible sampling)",
    )

    parser.add_argument(
        "--ddim_steps",
        type=int,
        default=50,
        help="number of ddim sampling steps",
    )
    parser.add_argument(
        "--scale",
        type=float,
        default=7.5,
        help="unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))",
    )
    parser.add_argument(
        "--strength",
        type=float,
        default=0.8,
        help="strength for noising/unnoising. 1.0 corresponds to full destruction of information in init image",
    )
    parser.add_argument(
        "--H",
        type=int,
        default=512,
        help="image height, in pixel space",
    )
    parser.add_argument(
        "--W",
        type=int,
        default=512,
        help="image width, in pixel space",
    )
    parser.add_argument(
        "--model-id",
        type=str,
        nargs="?",
        help="Inpainting model id by Huggin Face",
    )

    opt = parser.parse_args()

    MODEL_ID = opt.model_id
    DEVICE = "cuda"
    YOUR_TOKEN = opt.accesstoken
    INIT_IMG = opt.init_img
    OUT_IMG = opt.out_img
    PROMPT = opt.prompt
    DDIM_SEED = opt.seed
    STEP = opt.ddim_steps
    SCALE = opt.scale
    STRENGTH = opt.strength
    WIDTH = opt.W
    HEIGHT = opt.H

    # mask
    MASK_IMG = opt.mask_img

    mask_img = Image.open(MASK_IMG)
    mask_img = mask_img.resize((WIDTH, HEIGHT))

    scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False,
                            set_alpha_to_one=False)
    pipe = StableDiffusionInpaintPipeline.from_pretrained(
        MODEL_ID,
        scheduler=scheduler,
        revision="fp16",
        torch_dtype=torch.float16,
        use_auth_token=YOUR_TOKEN
    ).to(DEVICE)

    init_img = Image.open(INIT_IMG)
    init_img = init_img.resize((WIDTH, HEIGHT))

    generator = torch.Generator(device=DEVICE).manual_seed(DDIM_SEED)
    with autocast(DEVICE):
        image = pipe(prompt=PROMPT, init_image=init_img, mask_image=mask_img, strength=STRENGTH, guidance_scale=SCALE,
                    generator=generator, num_inference_steps=STEP)["sample"][0]
        image.save(OUT_IMG)


if __name__ == "__main__":
    main()
	from PIL import Image
	import argparse, os, sys, glob
	import torch
	from torch import autocast
	from diffusers import (
	DDIMScheduler,
	StableDiffusionInpaintPipeline
	)

	def main():
	parser = argparse.ArgumentParser()

	parser.add_argument(
	"--init-img",
	type=str,
	nargs="?",
	help="path to the input image"
	)
	parser.add_argument(
	"--mask-img",
	type=str,
	nargs="?",
	help="path to the mask image"
	)
	parser.add_argument(
	"--out-img",
	type=str,
	nargs="?",
	help="path to the output image"
	)

	parser.add_argument(
	"--prompt",
	type=str,
	nargs="?",
	default="a painting of a virus monster playing guitar",
	help="the prompt to render"
	)

	parser.add_argument(
	"--accesstoken",
	type=str,
	nargs="?",
	default="None",
	help="Hugging Face Access Token"
	)

	parser.add_argument(
	"--seed",
	type=int,
	default=42,
	help="the seed (for reproducible sampling)",
	)

	parser.add_argument(
	"--ddim_steps",
	type=int,
	default=50,
	help="number of ddim sampling steps",
	)
	parser.add_argument(
	"--scale",
	type=float,
	default=7.5,
	help="unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))",
	)
	parser.add_argument(
	"--strength",
	type=float,
	default=0.8,
	help="strength for noising/unnoising. 1.0 corresponds to full destruction of information in init image",
	)
	parser.add_argument(
	"--H",
	type=int,
	default=512,
	help="image height, in pixel space",
	)
	parser.add_argument(
	"--W",
	type=int,
	default=512,
	help="image width, in pixel space",
	)
	parser.add_argument(
	"--model-id",
	type=str,
	nargs="?",
	help="Inpainting model id by Huggin Face",
	)

	opt = parser.parse_args()

	MODEL_ID = opt.model_id
	DEVICE = "cuda"
	YOUR_TOKEN = opt.accesstoken
	INIT_IMG = opt.init_img
	OUT_IMG = opt.out_img
	PROMPT = opt.prompt
	DDIM_SEED = opt.seed
	STEP = opt.ddim_steps
	SCALE = opt.scale
	STRENGTH = opt.strength
	WIDTH = opt.W
	HEIGHT = opt.H

	# mask
	MASK_IMG = opt.mask_img

	mask_img = Image.open(MASK_IMG)
	mask_img = mask_img.resize((WIDTH, HEIGHT))

	scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False,
	set_alpha_to_one=False)
	pipe = StableDiffusionInpaintPipeline.from_pretrained(
	MODEL_ID,
	scheduler=scheduler,
	revision="fp16",
	torch_dtype=torch.float16,
	use_auth_token=YOUR_TOKEN
	).to(DEVICE)

	init_img = Image.open(INIT_IMG)
	init_img = init_img.resize((WIDTH, HEIGHT))

	generator = torch.Generator(device=DEVICE).manual_seed(DDIM_SEED)
	with autocast(DEVICE):
	image = pipe(prompt=PROMPT, init_image=init_img, mask_image=mask_img, strength=STRENGTH, guidance_scale=SCALE,
	generator=generator, num_inference_steps=STEP)["sample"][0]
	image.save(OUT_IMG)


	if __name__ == "__main__":
	main()