t5_base.safetensors (goes in ComfyUI/models/clip/): https://huggingface.co/google-t5/t5-base/blob/main/model.safetensors
stable audio open (goes in ComfyUI/models/checkpoints/): https://huggingface.co/stabilityai/stable-audio-open-1.0/tree/main
t5_base.safetensors (goes in ComfyUI/models/clip/): https://huggingface.co/google-t5/t5-base/blob/main/model.safetensors
stable audio open (goes in ComfyUI/models/checkpoints/): https://huggingface.co/stabilityai/stable-audio-open-1.0/tree/main
{ | |
"last_node_id": 15, | |
"last_link_id": 18, | |
"nodes": [ | |
{ | |
"id": 11, | |
"type": "EmptyLatentAudio", | |
"pos": [ | |
610, | |
639 | |
], | |
"size": { | |
"0": 210, | |
"1": 26 | |
}, | |
"flags": {}, | |
"order": 0, | |
"mode": 0, | |
"outputs": [ | |
{ | |
"name": "LATENT", | |
"type": "LATENT", | |
"links": [ | |
12 | |
], | |
"shape": 3 | |
} | |
], | |
"properties": { | |
"Node name for S&R": "EmptyLatentAudio" | |
} | |
}, | |
{ | |
"id": 12, | |
"type": "VAEDecodeAudio", | |
"pos": [ | |
1220, | |
187 | |
], | |
"size": { | |
"0": 210, | |
"1": 46 | |
}, | |
"flags": {}, | |
"order": 6, | |
"mode": 0, | |
"inputs": [ | |
{ | |
"name": "samples", | |
"type": "LATENT", | |
"link": 13 | |
}, | |
{ | |
"name": "vae", | |
"type": "VAE", | |
"link": 14, | |
"slot_index": 1 | |
} | |
], | |
"outputs": [ | |
{ | |
"name": "AUDIO", | |
"type": "AUDIO", | |
"links": [ | |
15 | |
], | |
"shape": 3, | |
"slot_index": 0 | |
} | |
], | |
"properties": { | |
"Node name for S&R": "VAEDecodeAudio" | |
} | |
}, | |
{ | |
"id": 13, | |
"type": "SaveAudio", | |
"pos": [ | |
1487, | |
190 | |
], | |
"size": { | |
"0": 315, | |
"1": 58 | |
}, | |
"flags": {}, | |
"order": 7, | |
"mode": 0, | |
"inputs": [ | |
{ | |
"name": "audio", | |
"type": "AUDIO", | |
"link": 15 | |
} | |
], | |
"properties": { | |
"Node name for S&R": "SaveAudio" | |
}, | |
"widgets_values": [ | |
"audio/ComfyUI" | |
] | |
}, | |
{ | |
"id": 7, | |
"type": "CLIPTextEncode", | |
"pos": [ | |
413, | |
389 | |
], | |
"size": { | |
"0": 425.27801513671875, | |
"1": 180.6060791015625 | |
}, | |
"flags": {}, | |
"order": 4, | |
"mode": 0, | |
"inputs": [ | |
{ | |
"name": "clip", | |
"type": "CLIP", | |
"link": 11 | |
} | |
], | |
"outputs": [ | |
{ | |
"name": "CONDITIONING", | |
"type": "CONDITIONING", | |
"links": [ | |
6 | |
], | |
"slot_index": 0 | |
} | |
], | |
"properties": { | |
"Node name for S&R": "CLIPTextEncode" | |
}, | |
"widgets_values": [ | |
"" | |
] | |
}, | |
{ | |
"id": 3, | |
"type": "KSampler", | |
"pos": [ | |
863, | |
186 | |
], | |
"size": { | |
"0": 315, | |
"1": 262 | |
}, | |
"flags": {}, | |
"order": 5, | |
"mode": 0, | |
"inputs": [ | |
{ | |
"name": "model", | |
"type": "MODEL", | |
"link": 18 | |
}, | |
{ | |
"name": "positive", | |
"type": "CONDITIONING", | |
"link": 4 | |
}, | |
{ | |
"name": "negative", | |
"type": "CONDITIONING", | |
"link": 6 | |
}, | |
{ | |
"name": "latent_image", | |
"type": "LATENT", | |
"link": 12, | |
"slot_index": 3 | |
} | |
], | |
"outputs": [ | |
{ | |
"name": "LATENT", | |
"type": "LATENT", | |
"links": [ | |
13 | |
], | |
"slot_index": 0 | |
} | |
], | |
"properties": { | |
"Node name for S&R": "KSampler" | |
}, | |
"widgets_values": [ | |
460563241628080, | |
"randomize", | |
100, | |
7, | |
"dpmpp_3m_sde_gpu", | |
"exponential", | |
1 | |
] | |
}, | |
{ | |
"id": 10, | |
"type": "CLIPLoader", | |
"pos": [ | |
-23, | |
241 | |
], | |
"size": { | |
"0": 315, | |
"1": 82 | |
}, | |
"flags": {}, | |
"order": 1, | |
"mode": 0, | |
"outputs": [ | |
{ | |
"name": "CLIP", | |
"type": "CLIP", | |
"links": [ | |
10, | |
11 | |
], | |
"shape": 3, | |
"slot_index": 0 | |
} | |
], | |
"properties": { | |
"Node name for S&R": "CLIPLoader" | |
}, | |
"widgets_values": [ | |
"t5_base.safetensors", | |
"stable_audio" | |
] | |
}, | |
{ | |
"id": 4, | |
"type": "CheckpointLoaderSimple", | |
"pos": [ | |
-42, | |
403 | |
], | |
"size": { | |
"0": 315, | |
"1": 98 | |
}, | |
"flags": {}, | |
"order": 2, | |
"mode": 0, | |
"outputs": [ | |
{ | |
"name": "MODEL", | |
"type": "MODEL", | |
"links": [ | |
18 | |
], | |
"slot_index": 0 | |
}, | |
{ | |
"name": "CLIP", | |
"type": "CLIP", | |
"links": [], | |
"slot_index": 1 | |
}, | |
{ | |
"name": "VAE", | |
"type": "VAE", | |
"links": [ | |
14 | |
], | |
"slot_index": 2 | |
} | |
], | |
"properties": { | |
"Node name for S&R": "CheckpointLoaderSimple" | |
}, | |
"widgets_values": [ | |
"stable_audio_open_1.0.safetensors" | |
] | |
}, | |
{ | |
"id": 6, | |
"type": "CLIPTextEncode", | |
"pos": [ | |
415, | |
186 | |
], | |
"size": { | |
"0": 422.84503173828125, | |
"1": 164.31304931640625 | |
}, | |
"flags": {}, | |
"order": 3, | |
"mode": 0, | |
"inputs": [ | |
{ | |
"name": "clip", | |
"type": "CLIP", | |
"link": 10 | |
} | |
], | |
"outputs": [ | |
{ | |
"name": "CONDITIONING", | |
"type": "CONDITIONING", | |
"links": [ | |
4 | |
], | |
"slot_index": 0 | |
} | |
], | |
"properties": { | |
"Node name for S&R": "CLIPTextEncode" | |
}, | |
"widgets_values": [ | |
"metal music" | |
] | |
} | |
], | |
"links": [ | |
[ | |
4, | |
6, | |
0, | |
3, | |
1, | |
"CONDITIONING" | |
], | |
[ | |
6, | |
7, | |
0, | |
3, | |
2, | |
"CONDITIONING" | |
], | |
[ | |
10, | |
10, | |
0, | |
6, | |
0, | |
"CLIP" | |
], | |
[ | |
11, | |
10, | |
0, | |
7, | |
0, | |
"CLIP" | |
], | |
[ | |
12, | |
11, | |
0, | |
3, | |
3, | |
"LATENT" | |
], | |
[ | |
13, | |
3, | |
0, | |
12, | |
0, | |
"LATENT" | |
], | |
[ | |
14, | |
4, | |
2, | |
12, | |
1, | |
"VAE" | |
], | |
[ | |
15, | |
12, | |
0, | |
13, | |
0, | |
"AUDIO" | |
], | |
[ | |
18, | |
4, | |
0, | |
3, | |
0, | |
"MODEL" | |
] | |
], | |
"groups": [], | |
"config": {}, | |
"extra": { | |
"ds": { | |
"scale": 1.4641000000000006, | |
"offset": [ | |
-86.97806793936128, | |
82.00428107739486 | |
] | |
} | |
}, | |
"version": 0.4 | |
} |
Where can i download the custom nodes ?
Update comfy, they are part of the base package now
Is there any means to vary the length of the clips. Per StabilityAI , they state the model is significantly better at sound effects than anything else, and I have to agree, but 47 seconds of a sound effect we only need 2 seconds of seems a bit rough. I'd imaging the "audio latent" we are feeding in is just a 47 second long white noise clip, or something of the sort.
Where can i download the custom nodes ?