Skip to content

Instantly share code, notes, and snippets.

@kurtschelfthout
Last active May 27, 2023 13:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kurtschelfthout/d90006ab590686e71ce4092eb8a7f0c8 to your computer and use it in GitHub Desktop.
Save kurtschelfthout/d90006ab590686e71ce4092eb8a7f0c8 to your computer and use it in GitHub Desktop.
wgpu memory leak repro
use std::borrow::Cow;
use wgpu::util::DeviceExt;
async fn get_device_async() -> Option<(wgpu::Device, wgpu::Queue)> {
// Instantiates instance of WebGPU
let instance = wgpu::Instance::default();
// Use the util method to respect the env vars `WGPU_POWER_PREF` and `WGPU_ADAPTER_NAME`
// WGPU_POWER_PREF can be set to `low` or `high` to prefer integrated or discrete GPUs respectively.
// WGPU_ADAPTER_NAME can be set to a substring of the name of the adapter you wish to use.
let backends = wgpu::util::backend_bits_from_env().unwrap_or_else(wgpu::Backends::all);
// println!("backends: {backends:?}");
// let adapters = instance.enumerate_adapters(backends);
// for adapter in adapters {
// let info = adapter.get_info();
// println!("adapter: {:?}", info);
// }
let adapter = wgpu::util::initialize_adapter_from_env_or_default(&instance, backends, None)
.await
.expect("No suitable GPU adapters found on the system!");
let info = adapter.get_info();
println!(
"Using {:#?} {} with {:#?} backend (Env vars: WGPU_POWER_PREF (low|high), WGPU_ADAPTER_NAME, WGPU_BACKEND).",
info.device_type, info.name, info.backend
);
// `request_device` instantiates the feature specific connection to the GPU, defining some parameters,
// `features` being the available features.
let r = adapter
.request_device(
&wgpu::DeviceDescriptor {
label: None,
features: wgpu::Features::empty(),
limits: wgpu::Limits::downlevel_defaults(),
},
None, // Some(Path::new("./trace")),
)
.await
.unwrap();
Some(r)
}
fn get_device() -> Option<(wgpu::Device, wgpu::Queue)> {
pollster::block_on(get_device_async())
}
fn byte_size<T>(size: usize) -> u64 {
u64::try_from(size * std::mem::size_of::<T>()).unwrap()
}
fn make_output_buffer(device: &wgpu::Device, size: usize) -> wgpu::Buffer {
let size = byte_size::<f32>(size);
device.create_buffer(&wgpu::BufferDescriptor {
label: Some("Operation output"),
size,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
})
}
fn get_bind_group_unary(
device: &wgpu::Device,
pipeline: &wgpu::ComputePipeline,
input_buffer: &wgpu::Buffer,
output_buffer: &wgpu::Buffer,
) -> wgpu::BindGroup {
let bind_group_layout = pipeline.get_bind_group_layout(0);
device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bind_group_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: input_buffer.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 1,
resource: output_buffer.as_entire_binding(),
},
],
})
}
fn encode_and_submit(
queue: &wgpu::Queue,
device: &wgpu::Device,
compute_pipeline: &wgpu::ComputePipeline,
bind_group: &wgpu::BindGroup,
workgroup_count: usize,
) {
let mut encoder =
device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
{
let mut compute_pass =
encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });
compute_pass.set_pipeline(compute_pipeline);
compute_pass.set_bind_group(0, bind_group, &[]);
let workgroup_count = u32::try_from(workgroup_count).unwrap();
compute_pass.dispatch_workgroups(workgroup_count, 1, 1);
}
queue.submit(Some(encoder.finish()));
}
fn pipeline(device: &wgpu::Device) -> wgpu::ComputePipeline {
let shader = include_str!("repro.wgsl");
let module = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some("repro.wgsl"),
source: wgpu::ShaderSource::Wgsl(Cow::Borrowed(shader)),
});
device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: None,
layout: None,
module: &module,
entry_point: "call",
})
}
fn main() {
let (device, queue) = get_device().unwrap();
let workgroup_count = 1;
let cpu_data = &[0.010f32; 512 * 512];
let input_buffer = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("data buffer"),
contents: bytemuck::cast_slice(cpu_data),
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
});
let compute_pipeline = pipeline(&device);
for _ in 0..10000 {
let output_buffer = make_output_buffer(&device, 1);
let bind_group =
get_bind_group_unary(&device, &compute_pipeline, &input_buffer, &output_buffer);
encode_and_submit(
&queue,
&device,
&compute_pipeline,
&bind_group,
workgroup_count,
);
}
}
@group(0) @binding(0)
var<storage, read> input_0: array<f32>;
@group(0) @binding(1)
var<storage, read_write> output_0: array<f32>;
@compute
@workgroup_size(1)
fn call(@builtin(global_invocation_id) global_id: vec3<u32>) {
let gidx = global_id.x;
if(gidx >= arrayLength(&output_0)) {
return;
}
var acc = 0.0;
for (var rec_i = 0u; rec_i < 512u * 512u; rec_i += 1u) {
acc = input_0[rec_i];
}
output_0[gidx] = acc; //input_0[target_input_idx];
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment