t5_base.safetensors (goes in ComfyUI/models/clip/): https://huggingface.co/google-t5/t5-base/blob/main/model.safetensors
stable audio open (goes in ComfyUI/models/checkpoints/): https://huggingface.co/stabilityai/stable-audio-open-1.0/tree/main
t5_base.safetensors (goes in ComfyUI/models/clip/): https://huggingface.co/google-t5/t5-base/blob/main/model.safetensors
stable audio open (goes in ComfyUI/models/checkpoints/): https://huggingface.co/stabilityai/stable-audio-open-1.0/tree/main
| { | |
| "last_node_id": 15, | |
| "last_link_id": 18, | |
| "nodes": [ | |
| { | |
| "id": 11, | |
| "type": "EmptyLatentAudio", | |
| "pos": [ | |
| 610, | |
| 639 | |
| ], | |
| "size": { | |
| "0": 210, | |
| "1": 26 | |
| }, | |
| "flags": {}, | |
| "order": 0, | |
| "mode": 0, | |
| "outputs": [ | |
| { | |
| "name": "LATENT", | |
| "type": "LATENT", | |
| "links": [ | |
| 12 | |
| ], | |
| "shape": 3 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "EmptyLatentAudio" | |
| } | |
| }, | |
| { | |
| "id": 12, | |
| "type": "VAEDecodeAudio", | |
| "pos": [ | |
| 1220, | |
| 187 | |
| ], | |
| "size": { | |
| "0": 210, | |
| "1": 46 | |
| }, | |
| "flags": {}, | |
| "order": 6, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "samples", | |
| "type": "LATENT", | |
| "link": 13 | |
| }, | |
| { | |
| "name": "vae", | |
| "type": "VAE", | |
| "link": 14, | |
| "slot_index": 1 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "AUDIO", | |
| "type": "AUDIO", | |
| "links": [ | |
| 15 | |
| ], | |
| "shape": 3, | |
| "slot_index": 0 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "VAEDecodeAudio" | |
| } | |
| }, | |
| { | |
| "id": 13, | |
| "type": "SaveAudio", | |
| "pos": [ | |
| 1487, | |
| 190 | |
| ], | |
| "size": { | |
| "0": 315, | |
| "1": 58 | |
| }, | |
| "flags": {}, | |
| "order": 7, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "audio", | |
| "type": "AUDIO", | |
| "link": 15 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "SaveAudio" | |
| }, | |
| "widgets_values": [ | |
| "audio/ComfyUI" | |
| ] | |
| }, | |
| { | |
| "id": 7, | |
| "type": "CLIPTextEncode", | |
| "pos": [ | |
| 413, | |
| 389 | |
| ], | |
| "size": { | |
| "0": 425.27801513671875, | |
| "1": 180.6060791015625 | |
| }, | |
| "flags": {}, | |
| "order": 4, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "clip", | |
| "type": "CLIP", | |
| "link": 11 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "CONDITIONING", | |
| "type": "CONDITIONING", | |
| "links": [ | |
| 6 | |
| ], | |
| "slot_index": 0 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CLIPTextEncode" | |
| }, | |
| "widgets_values": [ | |
| "" | |
| ] | |
| }, | |
| { | |
| "id": 3, | |
| "type": "KSampler", | |
| "pos": [ | |
| 863, | |
| 186 | |
| ], | |
| "size": { | |
| "0": 315, | |
| "1": 262 | |
| }, | |
| "flags": {}, | |
| "order": 5, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "model", | |
| "type": "MODEL", | |
| "link": 18 | |
| }, | |
| { | |
| "name": "positive", | |
| "type": "CONDITIONING", | |
| "link": 4 | |
| }, | |
| { | |
| "name": "negative", | |
| "type": "CONDITIONING", | |
| "link": 6 | |
| }, | |
| { | |
| "name": "latent_image", | |
| "type": "LATENT", | |
| "link": 12, | |
| "slot_index": 3 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "LATENT", | |
| "type": "LATENT", | |
| "links": [ | |
| 13 | |
| ], | |
| "slot_index": 0 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "KSampler" | |
| }, | |
| "widgets_values": [ | |
| 460563241628080, | |
| "randomize", | |
| 100, | |
| 7, | |
| "dpmpp_3m_sde_gpu", | |
| "exponential", | |
| 1 | |
| ] | |
| }, | |
| { | |
| "id": 10, | |
| "type": "CLIPLoader", | |
| "pos": [ | |
| -23, | |
| 241 | |
| ], | |
| "size": { | |
| "0": 315, | |
| "1": 82 | |
| }, | |
| "flags": {}, | |
| "order": 1, | |
| "mode": 0, | |
| "outputs": [ | |
| { | |
| "name": "CLIP", | |
| "type": "CLIP", | |
| "links": [ | |
| 10, | |
| 11 | |
| ], | |
| "shape": 3, | |
| "slot_index": 0 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CLIPLoader" | |
| }, | |
| "widgets_values": [ | |
| "t5_base.safetensors", | |
| "stable_audio" | |
| ] | |
| }, | |
| { | |
| "id": 4, | |
| "type": "CheckpointLoaderSimple", | |
| "pos": [ | |
| -42, | |
| 403 | |
| ], | |
| "size": { | |
| "0": 315, | |
| "1": 98 | |
| }, | |
| "flags": {}, | |
| "order": 2, | |
| "mode": 0, | |
| "outputs": [ | |
| { | |
| "name": "MODEL", | |
| "type": "MODEL", | |
| "links": [ | |
| 18 | |
| ], | |
| "slot_index": 0 | |
| }, | |
| { | |
| "name": "CLIP", | |
| "type": "CLIP", | |
| "links": [], | |
| "slot_index": 1 | |
| }, | |
| { | |
| "name": "VAE", | |
| "type": "VAE", | |
| "links": [ | |
| 14 | |
| ], | |
| "slot_index": 2 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CheckpointLoaderSimple" | |
| }, | |
| "widgets_values": [ | |
| "stable_audio_open_1.0.safetensors" | |
| ] | |
| }, | |
| { | |
| "id": 6, | |
| "type": "CLIPTextEncode", | |
| "pos": [ | |
| 415, | |
| 186 | |
| ], | |
| "size": { | |
| "0": 422.84503173828125, | |
| "1": 164.31304931640625 | |
| }, | |
| "flags": {}, | |
| "order": 3, | |
| "mode": 0, | |
| "inputs": [ | |
| { | |
| "name": "clip", | |
| "type": "CLIP", | |
| "link": 10 | |
| } | |
| ], | |
| "outputs": [ | |
| { | |
| "name": "CONDITIONING", | |
| "type": "CONDITIONING", | |
| "links": [ | |
| 4 | |
| ], | |
| "slot_index": 0 | |
| } | |
| ], | |
| "properties": { | |
| "Node name for S&R": "CLIPTextEncode" | |
| }, | |
| "widgets_values": [ | |
| "metal music" | |
| ] | |
| } | |
| ], | |
| "links": [ | |
| [ | |
| 4, | |
| 6, | |
| 0, | |
| 3, | |
| 1, | |
| "CONDITIONING" | |
| ], | |
| [ | |
| 6, | |
| 7, | |
| 0, | |
| 3, | |
| 2, | |
| "CONDITIONING" | |
| ], | |
| [ | |
| 10, | |
| 10, | |
| 0, | |
| 6, | |
| 0, | |
| "CLIP" | |
| ], | |
| [ | |
| 11, | |
| 10, | |
| 0, | |
| 7, | |
| 0, | |
| "CLIP" | |
| ], | |
| [ | |
| 12, | |
| 11, | |
| 0, | |
| 3, | |
| 3, | |
| "LATENT" | |
| ], | |
| [ | |
| 13, | |
| 3, | |
| 0, | |
| 12, | |
| 0, | |
| "LATENT" | |
| ], | |
| [ | |
| 14, | |
| 4, | |
| 2, | |
| 12, | |
| 1, | |
| "VAE" | |
| ], | |
| [ | |
| 15, | |
| 12, | |
| 0, | |
| 13, | |
| 0, | |
| "AUDIO" | |
| ], | |
| [ | |
| 18, | |
| 4, | |
| 0, | |
| 3, | |
| 0, | |
| "MODEL" | |
| ] | |
| ], | |
| "groups": [], | |
| "config": {}, | |
| "extra": { | |
| "ds": { | |
| "scale": 1.4641000000000006, | |
| "offset": [ | |
| -86.97806793936128, | |
| 82.00428107739486 | |
| ] | |
| } | |
| }, | |
| "version": 0.4 | |
| } |
Where can i download the custom nodes ?
Update comfy, they are part of the base package now
Is there any means to vary the length of the clips. Per StabilityAI , they state the model is significantly better at sound effects than anything else, and I have to agree, but 47 seconds of a sound effect we only need 2 seconds of seems a bit rough. I'd imaging the "audio latent" we are feeding in is just a 47 second long white noise clip, or something of the sort.
Where can i download the custom nodes ?