Skip to content

Instantly share code, notes, and snippets.

@krgr
Last active September 2, 2024 14:19
Show Gist options
  • Save krgr/c47c140024f895afed389ac0f37603d5 to your computer and use it in GitHub Desktop.
Save krgr/c47c140024f895afed389ac0f37603d5 to your computer and use it in GitHub Desktop.
#!/Users/tkroeger/.venv-raycast/bin/python
# Required parameters:
# @raycast.schemaVersion 1
# @raycast.title Generate Image
# @raycast.mode fullOutput
# Optional parameters:
# @raycast.icon 🌇
# @raycast.argument1 { "type": "text", "placeholder": "Enter your prompt" }
# @raycast.argument2 { "type": "dropdown", "placeholder": "Style", "data": [{"title": "(No style)", "value": "(No style)"}, {"title": "Cinematic", "value": "Cinematic"}, {"title": "Photographic", "value": "Photographic"}, {"title": "Anime", "value": "Anime"}, {"title": "Manga", "value": "Manga"}, {"title": "Digital art", "value": "Digital art"}, {"title": "Pixel art", "value": "Pixel art"}, {"title": "Fantasy art", "value": "Fantasy art"}, {"title": "Neonpunk", "value": "Neonpunk"}, {"title": "3D Model", "value": "3D Model"}]}
# @raycast.argument3 { "type": "text", "placeholder": "Enter a negative prompt", "optional": true"}
# @raycast.needsConfirmation true
# Documentation:
# @raycast.description Generates an image via AI prompt
# @raycast.author Tim Kroeger
# MIT License
# Copyright (c) 2024 Félix Sanz - https://www.felixsanz.dev
# Original at https://github.com/felixsanz/felixsanz_dev/blob/main/articles/pixart-a-with-less-than-8gb-vram/inference.py
# Modifications for Raycast and adaption from CUDA to MPS (c) 2024 Tim Kröger - https://krgr.dev
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import sys
import torch
from diffusers import PixArtAlphaPipeline
from transformers import T5EncoderModel
import gc
import time
from typing import List, Tuple, Union
model = 'PixArt-alpha/PixArt-XL-2-1024-MS'
style_list = [
{
"name": "(No style)",
"prompt": "{prompt}",
"negative_prompt": "",
},
{
"name": "Cinematic",
"prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
"negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
},
{
"name": "Photographic",
"prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
"negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
},
{
"name": "Anime",
"prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
"negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
},
{
"name": "Manga",
"prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
"negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style",
},
{
"name": "Digital Art",
"prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
"negative_prompt": "photo, photorealistic, realism, ugly",
},
{
"name": "Pixel art",
"prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
"negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic",
},
{
"name": "Fantasy art",
"prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
"negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white",
},
{
"name": "Neonpunk",
"prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
"negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",
},
{
"name": "3D Model",
"prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
"negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
},
]
styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
STYLE_NAMES = list(styles.keys())
DEFAULT_STYLE_NAME = "(No style)"
def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
if not negative:
negative = ""
return p.replace("{prompt}", positive), n + negative
filename = 'image-' + time.strftime("%Y%m%d-%H%M%S")
arg_names = ['command', 'prompt', 'style', 'negative_prompt']
args = dict(zip(arg_names, sys.argv))
prompt = args.get('prompt')
style = args.get('style')
negative_prompt = args.get('negative_prompt')
prompt, negative_prompt = apply_style(style, prompt, negative_prompt)
queue = []
queue.extend([{ 'prompt': prompt, 'negative_prompt': negative_prompt, 'filename': filename }])
text_encoder = T5EncoderModel.from_pretrained(
model,
subfolder='text_encoder',
torch_dtype=torch.float16,
device_map='auto',
)
pipe = PixArtAlphaPipeline.from_pretrained(
model,
torch_dtype=torch.float16,
text_encoder=text_encoder,
transformer=None,
device_map='balanced',
)
with torch.no_grad():
for generation in queue:
generation['embeddings'] = pipe.encode_prompt(generation['prompt'], generation['negative_prompt'])
del text_encoder
del pipe
gc.collect()
torch.mps.empty_cache()
pipe = PixArtAlphaPipeline.from_pretrained(
model,
torch_dtype=torch.float16,
text_encoder=None,
).to('mps')
generator = torch.Generator(device='mps')
if 'seed' in generation:
generator.manual_seed(generation['seed'])
else:
generator.seed()
image = pipe(
negative_prompt=None,
width=generation['width'] if 'width' in generation else 1024,
height=generation['height'] if 'height' in generation else 1024,
guidance_scale=generation['cfg'] if 'cfg' in generation else 7,
num_inference_steps=generation['steps'] if 'steps' in generation else 20,
generator=generator,
prompt_embeds=generation['embeddings'][0],
prompt_attention_mask=generation['embeddings'][1],
negative_prompt_embeds=generation['embeddings'][2],
negative_prompt_attention_mask=generation['embeddings'][3],
num_images_per_prompt=1,
).images[0]
image.save(f"{generation['filename']}.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment