Norod/text-to-video-modelscope.py

## text-to-video-modelscope.py
# Ran it with the following packages installed:
# accelerate                   0.18.0
# diffusers                    0.16.0.dev0
# torch                        2.0.0+cu118
# torchvision                  0.15.0+cu118
# transformers                 4.28.1
# xformers                     0.0.18

import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video

# load pipeline
pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

# optimize for GPU memory
pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()

# generate
prompt = "Alien rave party with dancing aliens and a DJ alien playing music. The aliens are wearing colorful clothes and are having a good time. The aliens are having a good time at the rave"
neg_prompt = "text, watermark, grafitti, blurry"
infer_steps = 50
num_frames = 40
seed=42
guidance = 16
width = 256
height = 256

output_video_path = prompt.replace(" ","_") + f'_seed{seed}_steps{infer_steps}_frames{num_frames}_guidance{guidance}_{width}x{height}.mp4'

if torch.cuda.is_available():
    generator = torch.Generator('cuda').manual_seed(seed) if seed != 0 else None
else:
    if seed != 0:
        generator = torch.Generator()
        generator.manual_seed(seed)
    else:
        generator = None

video_frames = pipe(prompt,
                    num_inference_steps=infer_steps,
                    num_frames=num_frames,
                    negative_prompt=neg_prompt,
                    guidance_scale = guidance,
                    width = width,
                    height = height,
                    generator = generator).frames

# convent to video
video_path = export_to_video(video_frames)
print(f'video_path: {video_path}')
print(f'Move to output_video_path: {output_video_path}')


#copy from video_path to output_video_path
import shutil
shutil.move(video_path, output_video_path)
	# Ran it with the following packages installed:
	# accelerate 0.18.0
	# diffusers 0.16.0.dev0
	# torch 2.0.0+cu118
	# torchvision 0.15.0+cu118
	# transformers 4.28.1
	# xformers 0.0.18

	import torch
	from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
	from diffusers.utils import export_to_video

	# load pipeline
	pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
	pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

	# optimize for GPU memory
	pipe.enable_model_cpu_offload()
	pipe.enable_vae_slicing()

	# generate
	prompt = "Alien rave party with dancing aliens and a DJ alien playing music. The aliens are wearing colorful clothes and are having a good time. The aliens are having a good time at the rave"
	neg_prompt = "text, watermark, grafitti, blurry"
	infer_steps = 50
	num_frames = 40
	seed=42
	guidance = 16
	width = 256
	height = 256

	output_video_path = prompt.replace(" ","_") + f'_seed{seed}_steps{infer_steps}_frames{num_frames}_guidance{guidance}_{width}x{height}.mp4'

	if torch.cuda.is_available():
	generator = torch.Generator('cuda').manual_seed(seed) if seed != 0 else None
	else:
	if seed != 0:
	generator = torch.Generator()
	generator.manual_seed(seed)
	else:
	generator = None

	video_frames = pipe(prompt,
	num_inference_steps=infer_steps,
	num_frames=num_frames,
	negative_prompt=neg_prompt,
	guidance_scale = guidance,
	width = width,
	height = height,
	generator = generator).frames

	# convent to video
	video_path = export_to_video(video_frames)
	print(f'video_path: {video_path}')
	print(f'Move to output_video_path: {output_video_path}')


	#copy from video_path to output_video_path
	import shutil
	shutil.move(video_path, output_video_path)