krgr/generate-image.py

## generate-image.py
#!/Users/tkroeger/.venv-raycast/bin/python

# Required parameters:
# @raycast.schemaVersion 1
# @raycast.title Generate Image
# @raycast.mode fullOutput

# Optional parameters:
# @raycast.icon 🌇
# @raycast.argument1 { "type": "text", "placeholder": "Filename" }
# @raycast.argument2 { "type": "text", "placeholder": "Prompt" }
# @raycast.needsConfirmation true

# Documentation:
# @raycast.description Generates an image via AI prompt
# @raycast.author Tim Kröger


# MIT License

# Copyright (c) 2024 Félix Sanz - https://www.felixsanz.dev
# Original at https://github.com/felixsanz/felixsanz_dev/blob/main/articles/pixart-a-with-less-than-8gb-vram/inference.py
# Modifications for Raycast and adaption from CUDA to MPS (c) 2024 Tim Kröger - https://krgr.dev

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import sys
import torch
from diffusers import PixArtAlphaPipeline
from transformers import T5EncoderModel
import gc

model = 'PixArt-alpha/PixArt-XL-2-1024-MS'

queue = []

queue.extend([{ 'prompt': sys.argv[2], 'filename': sys.argv[1] }])

text_encoder = T5EncoderModel.from_pretrained(
  model,
  subfolder='text_encoder',
  torch_dtype=torch.float16,
  device_map='auto',
)

pipe = PixArtAlphaPipeline.from_pretrained(
  model,
  torch_dtype=torch.float16,
  text_encoder=text_encoder,
  transformer=None,
  device_map='auto',
)

with torch.no_grad():
  for generation in queue:
    generation['embeddings'] = pipe.encode_prompt(generation['prompt'])

del text_encoder
del pipe
gc.collect()
torch.mps.empty_cache()

pipe = PixArtAlphaPipeline.from_pretrained(
  model,
  torch_dtype=torch.float16,
  text_encoder=None,
).to('mps')

generator = torch.Generator(device='mps')

if 'seed' in generation:
  generator.manual_seed(generation['seed'])
else:
  generator.seed()

image = pipe(
  negative_prompt=None,
  width=generation['width'] if 'width' in generation else 1024,
  height=generation['height'] if 'height' in generation else 1024,
  guidance_scale=generation['cfg'] if 'cfg' in generation else 7,
  num_inference_steps=generation['steps'] if 'steps' in generation else 20,
  generator=generator,
  prompt_embeds=generation['embeddings'][0],
  prompt_attention_mask=generation['embeddings'][1],
  negative_prompt_embeds=generation['embeddings'][2],
  negative_prompt_attention_mask=generation['embeddings'][3],
  num_images_per_prompt=1,
).images[0]

image.save(f"{generation['filename']}.png")
	#!/Users/tkroeger/.venv-raycast/bin/python

	# Required parameters:
	# @raycast.schemaVersion 1
	# @raycast.title Generate Image
	# @raycast.mode fullOutput

	# Optional parameters:
	# @raycast.icon 🌇
	# @raycast.argument1 { "type": "text", "placeholder": "Filename" }
	# @raycast.argument2 { "type": "text", "placeholder": "Prompt" }
	# @raycast.needsConfirmation true

	# Documentation:
	# @raycast.description Generates an image via AI prompt
	# @raycast.author Tim Kröger


	# MIT License

	# Copyright (c) 2024 Félix Sanz - https://www.felixsanz.dev
	# Original at https://github.com/felixsanz/felixsanz_dev/blob/main/articles/pixart-a-with-less-than-8gb-vram/inference.py
	# Modifications for Raycast and adaption from CUDA to MPS (c) 2024 Tim Kröger - https://krgr.dev

	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:

	# The above copyright notice and this permission notice shall be included in all
	# copies or substantial portions of the Software.

	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.

	import sys
	import torch
	from diffusers import PixArtAlphaPipeline
	from transformers import T5EncoderModel
	import gc

	model = 'PixArt-alpha/PixArt-XL-2-1024-MS'

	queue = []

	queue.extend([{ 'prompt': sys.argv[2], 'filename': sys.argv[1] }])

	text_encoder = T5EncoderModel.from_pretrained(
	model,
	subfolder='text_encoder',
	torch_dtype=torch.float16,
	device_map='auto',
	)

	pipe = PixArtAlphaPipeline.from_pretrained(
	model,
	torch_dtype=torch.float16,
	text_encoder=text_encoder,
	transformer=None,
	device_map='auto',
	)

	with torch.no_grad():
	for generation in queue:
	generation['embeddings'] = pipe.encode_prompt(generation['prompt'])

	del text_encoder
	del pipe
	gc.collect()
	torch.mps.empty_cache()

	pipe = PixArtAlphaPipeline.from_pretrained(
	model,
	torch_dtype=torch.float16,
	text_encoder=None,
	).to('mps')

	generator = torch.Generator(device='mps')

	if 'seed' in generation:
	generator.manual_seed(generation['seed'])
	else:
	generator.seed()

	image = pipe(
	negative_prompt=None,
	width=generation['width'] if 'width' in generation else 1024,
	height=generation['height'] if 'height' in generation else 1024,
	guidance_scale=generation['cfg'] if 'cfg' in generation else 7,
	num_inference_steps=generation['steps'] if 'steps' in generation else 20,
	generator=generator,
	prompt_embeds=generation['embeddings'][0],
	prompt_attention_mask=generation['embeddings'][1],
	negative_prompt_embeds=generation['embeddings'][2],
	negative_prompt_attention_mask=generation['embeddings'][3],
	num_images_per_prompt=1,
	).images[0]

	image.save(f"{generation['filename']}.png")