Skip to content

Instantly share code, notes, and snippets.

Forked from rolux/
Created February 7, 2020 02:37
Show Gist options
  • Save asears/34f42c5b78e4e6be7b5db15e93a434e9 to your computer and use it in GitHub Desktop.
Save asears/34f42c5b78e4e6be7b5db15e93a434e9 to your computer and use it in GitHub Desktop.
# git clone
import os
import numpy as np
from scipy.interpolate import interp1d
from import wavfile
import matplotlib.pyplot as plt
import PIL.Image
import moviepy.editor
import dnnlib
import dnnlib.tflib as tflib
import pretrained_networks
audio = {}
fps = 60
for mp3_filename in [f for f in os.listdir('data') if f.endswith('.mp3')]:
mp3_filename = f'data/{mp3_filename}'
wav_filename = mp3_filename[:-4] + '.wav'
if not os.path.exists(wav_filename):
audio_clip = moviepy.editor.AudioFileClip(mp3_filename)
audio_clip.write_audiofile(wav_filename, fps=44100, nbytes=2, codec='pcm_s16le')
track_name = os.path.basename(wav_filename)[15:-5]
rate, signal =
signal = np.mean(signal, axis=1) # to mono
signal = np.abs(signal)
seed = signal.shape[0]
duration = signal.shape[0] / rate
frames = int(np.ceil(duration * fps))
samples_per_frame = signal.shape[0] / frames
audio[track_name] = np.zeros(frames, dtype=signal.dtype)
for frame in range(frames):
start = int(round(frame * samples_per_frame))
stop = int(round((frame + 1) * samples_per_frame))
audio[track_name][frame] = np.mean(signal[start:stop], axis=0)
audio[track_name] /= max(audio[track_name])
for track in sorted(audio.keys()):
plt.figure(figsize=(8, 3))
network_pkl = 'gdrive:networks/stylegan2-ffhq-config-f.pkl'
_G, _D, Gs = pretrained_networks.load_networks(network_pkl)
Gs_kwargs = dnnlib.EasyDict()
Gs_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
Gs_kwargs.randomize_noise = False
Gs_syn_kwargs = dnnlib.EasyDict()
Gs_syn_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
Gs_syn_kwargs.randomize_noise = False
Gs_syn_kwargs.minibatch_size = 4
noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')]
w_avg = Gs.get_var('dlatent_avg')
def get_ws(n, frames, seed):
filename = f'data/ws_{n}_{frames}_{seed}.npy'
if not os.path.exists(filename):
src_ws = np.random.RandomState(seed).randn(n, 512)
ws = np.empty((frames, 512))
for i in range(512):
# FIXME: retarded
x = np.linspace(0, 3*frames, 3*len(src_ws), endpoint=False)
y = np.tile(src_ws[:, i], 3)
x_ = np.linspace(0, 3*frames, 3*frames, endpoint=False)
y_ = interp1d(x, y, kind='quadratic', fill_value='extrapolate')(x_)
ws[:, i] = y_[frames:2*frames], ws)
ws = np.load(filename)
return ws
def mix_styles(wa, wb, ivs):
w = np.copy(wa)
for i, v in ivs:
w[i] = wa[i] * (1 - v) + wb[i] * v
return w
def normalize_vector(v):
return v * np.std(w_avg) / np.std(v) + np.mean(w_avg) - np.mean(v)
def render_frame(t):
global base_index
frame = np.clip( * fps)), 0, frames - 1)
base_index += base_speed * audio['Instrumental'][frame]**2
base_w = base_ws[int(round(base_index)) % len(base_ws)]
base_w = np.tile(base_w, (18, 1))
psi = 0.5 + audio['FX'][frame] / 2
base_w = w_avg + (base_w - w_avg) * psi
mix_w = np.tile(mix_ws[frame], (18, 1))
mix_w = w_avg + (mix_w - w_avg) * 0.75
ranges = [range(0, 4), range(4, 8), range(8, 18)]
values = [audio[track][frame] for track in ['Drums', 'E Drums', 'Synth']]
w = mix_styles(base_w, mix_w, zip(ranges, values))
w += mouth_open * audio['Vocal'][frame] * 1.5
image =[w]), **Gs_syn_kwargs)[0]
image = PIL.Image.fromarray(image).resize((size, size), PIL.Image.LANCZOS)
return np.array(image)
size = 1080
seconds = int(np.ceil(duration))
resolution = 10
base_frames = resolution * frames
base_ws = get_ws(seconds, base_frames, seed)
base_speed = base_frames / sum(audio['Instrumental']**2)
base_index = 0
mix_ws = get_ws(seconds, frames, seed + 1)
mouth_open = normalize_vector(-np.load('data/mouth_ratio.npy'))
mp4_filename = 'data/Culture Shock.mp4'
video_clip = moviepy.editor.VideoClip(render_frame, duration=duration)
audio_clip_i = moviepy.editor.AudioFileClip('data/Culture Shock (Instrumental).wav')
audio_clip_v = moviepy.editor.AudioFileClip('data/Culture Shock (Vocal).wav')
audio_clip = moviepy.editor.CompositeAudioClip([audio_clip_i, audio_clip_v])
video_clip = video_clip.set_audio(audio_clip)
video_clip.write_videofile(mp4_filename, fps=fps, codec='libx264', audio_codec='aac', bitrate='8M')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment