Skip to content

Instantly share code, notes, and snippets.

@brothermechanic
Created January 10, 2020 18:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brothermechanic/9795042d5f19fcc73ef0936ba2ce56a6 to your computer and use it in GitHub Desktop.
Save brothermechanic/9795042d5f19fcc73ef0936ba2ce56a6 to your computer and use it in GitHub Desktop.
D4686_master.diff
diff -Naur a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
--- a/intern/cycles/blender/addon/engine.py 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/addon/engine.py 2020-01-10 21:01:30.634277071 +0300
@@ -258,6 +258,7 @@
if crl.pass_debug_bvh_traversed_instances: yield ("Debug BVH Traversed Instances", "X", 'VALUE')
if crl.pass_debug_bvh_intersections: yield ("Debug BVH Intersections", "X", 'VALUE')
if crl.pass_debug_ray_bounces: yield ("Debug Ray Bounces", "X", 'VALUE')
+ if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE')
if crl.use_pass_volume_direct: yield ("VolumeDir", "RGB", 'COLOR')
if crl.use_pass_volume_indirect: yield ("VolumeInd", "RGB", 'COLOR')
diff -Naur a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
--- a/intern/cycles/blender/addon/properties.py 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/addon/properties.py 2020-01-10 21:06:42.827616043 +0300
@@ -112,6 +112,7 @@
enum_sampling_pattern = (
('SOBOL', "Sobol", "Use Sobol random sampling pattern"),
('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"),
+ ('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"),
)
enum_integrator = (
@@ -357,6 +358,26 @@
default=0,
)
+ adaptive_threshold: FloatProperty(
+ name="Adaptive Sampling Threshold",
+ description="Zero for automatic setting based on AA samples",
+ min=0.0, max=1.0,
+ default=0.0,
+ )
+
+ adaptive_min_samples: IntProperty(
+ name="Adaptive Min Samples",
+ description="Minimum AA samples for adaptive sampling. Zero for automatic setting based on AA samples",
+ min=0, max=4096,
+ default=0,
+ )
+
+ use_adaptive_sampling: BoolProperty(
+ name="Use adaptive sampling",
+ description="Automatically determine the number of samples per pixel based on a variance estimation",
+ default=False,
+ )
+
caustics_reflective: BoolProperty(
name="Reflective Caustics",
description="Use reflective caustics, resulting in a brighter image (more noise but added realism)",
@@ -1285,6 +1306,12 @@
default=False,
update=update_render_passes,
)
+ pass_debug_sample_count: BoolProperty(
+ name="Debug Sample Count",
+ description="Number of samples/camera rays per pixel",
+ default=False,
+ update=update_render_passes,
+ )
use_pass_volume_direct: BoolProperty(
name="Volume Direct",
diff -Naur a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
--- a/intern/cycles/blender/addon/ui.py 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/addon/ui.py 2020-01-10 20:42:43.454256722 +0300
@@ -188,6 +188,8 @@
col.prop(cscene, "aa_samples", text="Render")
col.prop(cscene, "preview_aa_samples", text="Viewport")
+ col.prop(cscene, "use_adaptive_sampling", text="Adaptive Sampling")
+
class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
bl_label = "Sub Samples"
@@ -239,7 +241,13 @@
row.prop(cscene, "seed")
row.prop(cscene, "use_animated_seed", text="", icon='TIME')
- layout.prop(cscene, "sampling_pattern", text="Pattern")
+ col = layout.column(align=True)
+ col.active = not(cscene.use_adaptive_sampling)
+ col.prop(cscene, "sampling_pattern", text="Pattern")
+ col = layout.column(align=True)
+ col.active = cscene.use_adaptive_sampling
+ col.prop(cscene, "adaptive_min_samples", text="Adaptive Min Samples")
+ col.prop(cscene, "adaptive_threshold", text="Adaptive Threshold")
layout.prop(cscene, "use_square_samples")
@@ -803,6 +811,8 @@
col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data")
col = flow.column()
col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
+ col = flow.column()
+ col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")
layout.separator()
diff -Naur a/intern/cycles/blender/addon/ui.py.orig b/intern/cycles/blender/addon/ui.py.orig
--- a/intern/cycles/blender/addon/ui.py.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/blender/addon/ui.py.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,2356 @@
+#
+# Copyright 2011-2013 Blender Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# <pep8 compliant>
+
+import bpy
+from bpy_extras.node_utils import find_node_input
+from bl_ui.utils import PresetPanel
+
+from bpy.types import Panel
+
+from bl_ui.properties_grease_pencil_common import GreasePencilSimplifyPanel
+
+
+class CYCLES_PT_sampling_presets(PresetPanel, Panel):
+ bl_label = "Sampling Presets"
+ preset_subdir = "cycles/sampling"
+ preset_operator = "script.execute_preset"
+ preset_add_operator = "render.cycles_sampling_preset_add"
+ COMPAT_ENGINES = {'CYCLES'}
+
+
+class CYCLES_PT_integrator_presets(PresetPanel, Panel):
+ bl_label = "Integrator Presets"
+ preset_subdir = "cycles/integrator"
+ preset_operator = "script.execute_preset"
+ preset_add_operator = "render.cycles_integrator_preset_add"
+ COMPAT_ENGINES = {'CYCLES'}
+
+
+class CyclesButtonsPanel:
+ bl_space_type = "PROPERTIES"
+ bl_region_type = "WINDOW"
+ bl_context = "render"
+ COMPAT_ENGINES = {'CYCLES'}
+
+ @classmethod
+ def poll(cls, context):
+ return context.engine in cls.COMPAT_ENGINES
+
+
+# Adapt properties editor panel to display in node editor. We have to
+# copy the class rather than inherit due to the way bpy registration works.
+def node_panel(cls):
+ node_cls = type('NODE_' + cls.__name__, cls.__bases__, dict(cls.__dict__))
+
+ node_cls.bl_space_type = 'NODE_EDITOR'
+ node_cls.bl_region_type = 'UI'
+ node_cls.bl_category = "Options"
+ if hasattr(node_cls, 'bl_parent_id'):
+ node_cls.bl_parent_id = 'NODE_' + node_cls.bl_parent_id
+
+ return node_cls
+
+
+def get_device_type(context):
+ return context.preferences.addons[__package__].preferences.compute_device_type
+
+
+def use_cpu(context):
+ cscene = context.scene.cycles
+
+ return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')
+
+
+def use_opencl(context):
+ cscene = context.scene.cycles
+
+ return (get_device_type(context) == 'OPENCL' and cscene.device == 'GPU')
+
+
+def use_cuda(context):
+ cscene = context.scene.cycles
+
+ return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')
+
+
+def use_optix(context):
+ cscene = context.scene.cycles
+
+ return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU')
+
+
+def use_branched_path(context):
+ cscene = context.scene.cycles
+
+ return (cscene.progressive == 'BRANCHED_PATH' and not use_optix(context))
+
+
+def use_sample_all_lights(context):
+ cscene = context.scene.cycles
+
+ return cscene.sample_all_lights_direct or cscene.sample_all_lights_indirect
+
+
+def show_device_active(context):
+ cscene = context.scene.cycles
+ if cscene.device != 'GPU':
+ return True
+ return context.preferences.addons[__package__].preferences.has_active_device()
+
+
+def draw_samples_info(layout, context):
+ cscene = context.scene.cycles
+ integrator = cscene.progressive
+
+ # Calculate sample values
+ if integrator == 'PATH':
+ aa = cscene.samples
+ if cscene.use_square_samples:
+ aa = aa * aa
+ else:
+ aa = cscene.aa_samples
+ d = cscene.diffuse_samples
+ g = cscene.glossy_samples
+ t = cscene.transmission_samples
+ ao = cscene.ao_samples
+ ml = cscene.mesh_light_samples
+ sss = cscene.subsurface_samples
+ vol = cscene.volume_samples
+
+ if cscene.use_square_samples:
+ aa = aa * aa
+ d = d * d
+ g = g * g
+ t = t * t
+ ao = ao * ao
+ ml = ml * ml
+ sss = sss * sss
+ vol = vol * vol
+
+ # Draw interface
+ # Do not draw for progressive, when Square Samples are disabled
+ if use_branched_path(context) or (cscene.use_square_samples and integrator == 'PATH'):
+ col = layout.column(align=True)
+ col.scale_y = 0.6
+ col.label(text="Total Samples:")
+ col.separator()
+ if integrator == 'PATH':
+ col.label(text="%s AA" % aa)
+ else:
+ col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" %
+ (aa, d * aa, g * aa, t * aa))
+ col.separator()
+ col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" %
+ (ao * aa, ml * aa, sss * aa, vol * aa))
+
+
+class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel):
+ bl_label = "Sampling"
+
+ def draw_header_preset(self, context):
+ CYCLES_PT_sampling_presets.draw_panel_header(self.layout)
+
+ def draw(self, context):
+ layout = self.layout
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ if not use_optix(context):
+ layout.prop(cscene, "progressive")
+
+ if cscene.progressive == 'PATH' or use_branched_path(context) is False:
+ col = layout.column(align=True)
+ col.prop(cscene, "samples", text="Render")
+ col.prop(cscene, "preview_samples", text="Viewport")
+
+ draw_samples_info(layout, context)
+ else:
+ col = layout.column(align=True)
+ col.prop(cscene, "aa_samples", text="Render")
+ col.prop(cscene, "preview_aa_samples", text="Viewport")
+
+
+class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
+ bl_label = "Sub Samples"
+ bl_parent_id = "CYCLES_RENDER_PT_sampling"
+
+ @classmethod
+ def poll(cls, context):
+ scene = context.scene
+ cscene = scene.cycles
+ return cscene.progressive != 'PATH' and use_branched_path(context)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column(align=True)
+ col.prop(cscene, "diffuse_samples", text="Diffuse")
+ col.prop(cscene, "glossy_samples", text="Glossy")
+ col.prop(cscene, "transmission_samples", text="Transmission")
+ col.prop(cscene, "ao_samples", text="AO")
+
+ sub = col.row(align=True)
+ sub.active = use_sample_all_lights(context)
+ sub.prop(cscene, "mesh_light_samples", text="Mesh Light")
+ col.prop(cscene, "subsurface_samples", text="Subsurface")
+ col.prop(cscene, "volume_samples", text="Volume")
+
+ draw_samples_info(layout, context)
+
+
+class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
+ bl_label = "Advanced"
+ bl_parent_id = "CYCLES_RENDER_PT_sampling"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ row = layout.row(align=True)
+ row.prop(cscene, "seed")
+ row.prop(cscene, "use_animated_seed", text="", icon='TIME')
+
+ layout.prop(cscene, "sampling_pattern", text="Pattern")
+
+ layout.prop(cscene, "use_square_samples")
+
+ layout.separator()
+
+ col = layout.column(align=True)
+ col.prop(cscene, "min_light_bounces")
+ col.prop(cscene, "min_transparent_bounces")
+ col.prop(cscene, "light_sampling_threshold", text="Light Threshold")
+
+ if cscene.progressive != 'PATH' and use_branched_path(context):
+ col = layout.column(align=True)
+ col.prop(cscene, "sample_all_lights_direct")
+ col.prop(cscene, "sample_all_lights_indirect")
+
+ for view_layer in scene.view_layers:
+ if view_layer.samples > 0:
+ layout.separator()
+ layout.row().prop(cscene, "use_layer_samples")
+ break
+
+
+class CYCLES_RENDER_PT_sampling_total(CyclesButtonsPanel, Panel):
+ bl_label = "Total Samples"
+ bl_parent_id = "CYCLES_RENDER_PT_sampling"
+
+ @classmethod
+ def poll(cls, context):
+ scene = context.scene
+ cscene = scene.cycles
+
+ if cscene.use_square_samples:
+ return True
+
+ return cscene.progressive != 'PATH' and use_branched_path(context)
+
+ def draw(self, context):
+ layout = self.layout
+ cscene = context.scene.cycles
+ integrator = cscene.progressive
+
+ # Calculate sample values
+ if integrator == 'PATH':
+ aa = cscene.samples
+ if cscene.use_square_samples:
+ aa = aa * aa
+ else:
+ aa = cscene.aa_samples
+ d = cscene.diffuse_samples
+ g = cscene.glossy_samples
+ t = cscene.transmission_samples
+ ao = cscene.ao_samples
+ ml = cscene.mesh_light_samples
+ sss = cscene.subsurface_samples
+ vol = cscene.volume_samples
+
+ if cscene.use_square_samples:
+ aa = aa * aa
+ d = d * d
+ g = g * g
+ t = t * t
+ ao = ao * ao
+ ml = ml * ml
+ sss = sss * sss
+ vol = vol * vol
+
+ col = layout.column(align=True)
+ col.scale_y = 0.6
+ if integrator == 'PATH':
+ col.label(text="%s AA" % aa)
+ else:
+ col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" %
+ (aa, d * aa, g * aa, t * aa))
+ col.separator()
+ col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" %
+ (ao * aa, ml * aa, sss * aa, vol * aa))
+
+
+class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
+ bl_label = "Subdivision"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ return (context.scene.render.engine == 'CYCLES') and (context.scene.cycles.feature_set == 'EXPERIMENTAL')
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column()
+ sub = col.column(align=True)
+ sub.prop(cscene, "dicing_rate", text="Dicing Rate Render")
+ sub.prop(cscene, "preview_dicing_rate", text="Preview")
+
+ col.separator()
+
+ col.prop(cscene, "offscreen_dicing_scale", text="Offscreen Scale")
+ col.prop(cscene, "max_subdivisions")
+
+ col.prop(cscene, "dicing_camera")
+
+
+class CYCLES_RENDER_PT_hair(CyclesButtonsPanel, Panel):
+ bl_label = "Hair"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw_header(self, context):
+ layout = self.layout
+ scene = context.scene
+ ccscene = scene.cycles_curves
+
+ layout.prop(ccscene, "use_curves", text="")
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ ccscene = scene.cycles_curves
+
+ layout.active = ccscene.use_curves
+
+ col = layout.column()
+ col.prop(ccscene, "shape", text="Shape")
+ if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
+ col.prop(ccscene, "cull_backfacing", text="Cull back-faces")
+ col.prop(ccscene, "primitive", text="Primitive")
+
+ if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
+ col.prop(ccscene, "resolution", text="Resolution")
+ elif ccscene.primitive == 'CURVE_SEGMENTS':
+ col.prop(ccscene, "subdivisions", text="Curve subdivisions")
+
+
+class CYCLES_RENDER_PT_volumes(CyclesButtonsPanel, Panel):
+ bl_label = "Volumes"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column()
+ col.prop(cscene, "volume_step_size", text="Step Size")
+ col.prop(cscene, "volume_max_steps", text="Max Steps")
+
+
+class CYCLES_RENDER_PT_light_paths(CyclesButtonsPanel, Panel):
+ bl_label = "Light Paths"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw_header_preset(self, context):
+ CYCLES_PT_integrator_presets.draw_panel_header(self.layout)
+
+ def draw(self, context):
+ pass
+
+
+class CYCLES_RENDER_PT_light_paths_max_bounces(CyclesButtonsPanel, Panel):
+ bl_label = "Max Bounces"
+ bl_parent_id = "CYCLES_RENDER_PT_light_paths"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column(align=True)
+ col.prop(cscene, "max_bounces", text="Total")
+
+ col = layout.column(align=True)
+ col.prop(cscene, "diffuse_bounces", text="Diffuse")
+ col.prop(cscene, "glossy_bounces", text="Glossy")
+ col.prop(cscene, "transparent_max_bounces", text="Transparency")
+ col.prop(cscene, "transmission_bounces", text="Transmission")
+ col.prop(cscene, "volume_bounces", text="Volume")
+
+
+class CYCLES_RENDER_PT_light_paths_clamping(CyclesButtonsPanel, Panel):
+ bl_label = "Clamping"
+ bl_parent_id = "CYCLES_RENDER_PT_light_paths"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column(align=True)
+ col.prop(cscene, "sample_clamp_direct", text="Direct Light")
+ col.prop(cscene, "sample_clamp_indirect", text="Indirect Light")
+
+
+class CYCLES_RENDER_PT_light_paths_caustics(CyclesButtonsPanel, Panel):
+ bl_label = "Caustics"
+ bl_parent_id = "CYCLES_RENDER_PT_light_paths"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column()
+ col.prop(cscene, "blur_glossy")
+ col.prop(cscene, "caustics_reflective")
+ col.prop(cscene, "caustics_refractive")
+
+
+class CYCLES_RENDER_PT_motion_blur(CyclesButtonsPanel, Panel):
+ bl_label = "Motion Blur"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw_header(self, context):
+ rd = context.scene.render
+
+ self.layout.prop(rd, "use_motion_blur", text="")
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+ rd = scene.render
+ layout.active = rd.use_motion_blur
+
+ col = layout.column()
+ col.prop(cscene, "motion_blur_position", text="Position")
+ col.prop(rd, "motion_blur_shutter")
+ col.separator()
+ col.prop(cscene, "rolling_shutter_type", text="Rolling Shutter")
+ sub = col.column()
+ sub.active = cscene.rolling_shutter_type != 'NONE'
+ sub.prop(cscene, "rolling_shutter_duration")
+
+
+class CYCLES_RENDER_PT_motion_blur_curve(CyclesButtonsPanel, Panel):
+ bl_label = "Shutter Curve"
+ bl_parent_id = "CYCLES_RENDER_PT_motion_blur"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ rd = scene.render
+ layout.active = rd.use_motion_blur
+
+ col = layout.column()
+
+ col.template_curve_mapping(rd, "motion_blur_shutter_curve")
+
+ col = layout.column(align=True)
+ row = col.row(align=True)
+ row.operator("render.shutter_curve_preset", icon='SMOOTHCURVE', text="").shape = 'SMOOTH'
+ row.operator("render.shutter_curve_preset", icon='SPHERECURVE', text="").shape = 'ROUND'
+ row.operator("render.shutter_curve_preset", icon='ROOTCURVE', text="").shape = 'ROOT'
+ row.operator("render.shutter_curve_preset", icon='SHARPCURVE', text="").shape = 'SHARP'
+ row.operator("render.shutter_curve_preset", icon='LINCURVE', text="").shape = 'LINE'
+ row.operator("render.shutter_curve_preset", icon='NOCURVE', text="").shape = 'MAX'
+
+
+class CYCLES_RENDER_PT_film(CyclesButtonsPanel, Panel):
+ bl_label = "Film"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column()
+ col.prop(cscene, "film_exposure")
+
+
+class CYCLES_RENDER_PT_film_transparency(CyclesButtonsPanel, Panel):
+ bl_label = "Transparent"
+ bl_parent_id = "CYCLES_RENDER_PT_film"
+
+ def draw_header(self, context):
+ layout = self.layout
+
+ scene = context.scene
+ rd = scene.render
+
+ layout.prop(rd, "film_transparent", text="")
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+ scene = context.scene
+ rd = scene.render
+ cscene = scene.cycles
+
+ layout.active = rd.film_transparent
+
+ col = layout.column()
+ col.prop(cscene, "film_transparent_glass", text="Transparent Glass")
+
+ sub = col.column()
+ sub.active = rd.film_transparent and cscene.film_transparent_glass
+ sub.prop(cscene, "film_transparent_roughness", text="Roughness Threshold")
+
+
+class CYCLES_RENDER_PT_film_pixel_filter(CyclesButtonsPanel, Panel):
+ bl_label = "Pixel Filter"
+ bl_parent_id = "CYCLES_RENDER_PT_film"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column()
+ col.prop(cscene, "pixel_filter_type", text="Type")
+ if cscene.pixel_filter_type != 'BOX':
+ col.prop(cscene, "filter_width", text="Width")
+
+
+class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel):
+ bl_label = "Performance"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw(self, context):
+ pass
+
+
+class CYCLES_RENDER_PT_performance_threads(CyclesButtonsPanel, Panel):
+ bl_label = "Threads"
+ bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ rd = scene.render
+
+ col = layout.column()
+
+ col.prop(rd, "threads_mode")
+ sub = col.column(align=True)
+ sub.enabled = rd.threads_mode == 'FIXED'
+ sub.prop(rd, "threads")
+
+
+class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
+ bl_label = "Tiles"
+ bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ rd = scene.render
+ cscene = scene.cycles
+
+ col = layout.column()
+
+ sub = col.column(align=True)
+ sub.prop(rd, "tile_x", text="Tiles X")
+ sub.prop(rd, "tile_y", text="Y")
+ col.prop(cscene, "tile_order", text="Order")
+
+ sub = col.column()
+ sub.active = not rd.use_save_buffers
+ for view_layer in scene.view_layers:
+ if view_layer.cycles.use_denoising:
+ sub.active = False
+ sub.prop(cscene, "use_progressive_refine")
+
+
+class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Panel):
+ bl_label = "Acceleration Structure"
+ bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+ def draw(self, context):
+ import _cycles
+
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column()
+
+ if _cycles.with_embree:
+ row = col.row()
+ row.active = use_cpu(context)
+ row.prop(cscene, "use_bvh_embree")
+ col.prop(cscene, "debug_use_spatial_splits")
+ sub = col.column()
+ sub.active = not cscene.use_bvh_embree or not _cycles.with_embree
+ sub.prop(cscene, "debug_use_hair_bvh")
+ sub = col.column()
+ sub.active = not cscene.debug_use_spatial_splits and not cscene.use_bvh_embree
+ sub.prop(cscene, "debug_bvh_time_steps")
+
+
+class CYCLES_RENDER_PT_performance_final_render(CyclesButtonsPanel, Panel):
+ bl_label = "Final Render"
+ bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ rd = scene.render
+
+ col = layout.column()
+
+ col.prop(rd, "use_save_buffers")
+ col.prop(rd, "use_persistent_data", text="Persistent Images")
+
+
+class CYCLES_RENDER_PT_performance_viewport(CyclesButtonsPanel, Panel):
+ bl_label = "Viewport"
+ bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ rd = scene.render
+ cscene = scene.cycles
+
+ col = layout.column()
+ col.prop(rd, "preview_pixel_size", text="Pixel Size")
+ col.prop(cscene, "preview_start_resolution", text="Start Pixels")
+
+
+class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
+ bl_label = "Filter"
+ bl_options = {'DEFAULT_CLOSED'}
+ bl_context = "view_layer"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ with_freestyle = bpy.app.build_options.freestyle
+
+ scene = context.scene
+ rd = scene.render
+ view_layer = context.view_layer
+
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+
+ col = flow.column()
+ col.prop(view_layer, "use_sky", text="Environment")
+ col = flow.column()
+ col.prop(view_layer, "use_ao", text="Ambient Occlusion")
+ col = flow.column()
+ col.prop(view_layer, "use_solid", text="Surfaces")
+ col = flow.column()
+ col.prop(view_layer, "use_strand", text="Hair")
+ if with_freestyle:
+ col = flow.column()
+ col.prop(view_layer, "use_freestyle", text="Freestyle")
+ col.active = rd.use_freestyle
+
+
+class CYCLES_RENDER_PT_override(CyclesButtonsPanel, Panel):
+ bl_label = "Override"
+ bl_options = {'DEFAULT_CLOSED'}
+ bl_context = "view_layer"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ view_layer = context.view_layer
+
+ layout.prop(view_layer, "material_override")
+ layout.prop(view_layer, "samples")
+
+
+class CYCLES_RENDER_PT_passes(CyclesButtonsPanel, Panel):
+ bl_label = "Passes"
+ bl_context = "view_layer"
+
+ def draw(self, context):
+ pass
+
+
+class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
+ bl_label = "Data"
+ bl_context = "view_layer"
+ bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ rd = scene.render
+ view_layer = context.view_layer
+ cycles_view_layer = view_layer.cycles
+
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+ col = flow.column()
+ col.prop(view_layer, "use_pass_combined")
+ col = flow.column()
+ col.prop(view_layer, "use_pass_z")
+ col = flow.column()
+ col.prop(view_layer, "use_pass_mist")
+ col = flow.column()
+ col.prop(view_layer, "use_pass_normal")
+ col = flow.column()
+ col.prop(view_layer, "use_pass_vector")
+ col.active = not rd.use_motion_blur
+ col = flow.column()
+ col.prop(view_layer, "use_pass_uv")
+ col = flow.column()
+ col.prop(view_layer, "use_pass_object_index")
+ col = flow.column()
+ col.prop(view_layer, "use_pass_material_index")
+
+ layout.separator()
+
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+ col = flow.column()
+ col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data")
+ col = flow.column()
+ col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
+
+ layout.separator()
+
+ layout.prop(view_layer, "pass_alpha_threshold")
+
+
+class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
+ bl_label = "Light"
+ bl_context = "view_layer"
+ bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ view_layer = context.view_layer
+ cycles_view_layer = view_layer.cycles
+
+ split = layout.split(factor=0.35)
+ split.use_property_split = False
+ split.label(text="Diffuse")
+ row = split.row(align=True)
+ row.prop(view_layer, "use_pass_diffuse_direct", text="Direct", toggle=True)
+ row.prop(view_layer, "use_pass_diffuse_indirect", text="Indirect", toggle=True)
+ row.prop(view_layer, "use_pass_diffuse_color", text="Color", toggle=True)
+
+ split = layout.split(factor=0.35)
+ split.use_property_split = False
+ split.label(text="Glossy")
+ row = split.row(align=True)
+ row.prop(view_layer, "use_pass_glossy_direct", text="Direct", toggle=True)
+ row.prop(view_layer, "use_pass_glossy_indirect", text="Indirect", toggle=True)
+ row.prop(view_layer, "use_pass_glossy_color", text="Color", toggle=True)
+
+ split = layout.split(factor=0.35)
+ split.use_property_split = False
+ split.label(text="Transmission")
+ row = split.row(align=True)
+ row.prop(view_layer, "use_pass_transmission_direct", text="Direct", toggle=True)
+ row.prop(view_layer, "use_pass_transmission_indirect", text="Indirect", toggle=True)
+ row.prop(view_layer, "use_pass_transmission_color", text="Color", toggle=True)
+
+ split = layout.split(factor=0.35)
+ split.use_property_split = False
+ split.label(text="Subsurface")
+ row = split.row(align=True)
+ row.prop(view_layer, "use_pass_subsurface_direct", text="Direct", toggle=True)
+ row.prop(view_layer, "use_pass_subsurface_indirect", text="Indirect", toggle=True)
+ row.prop(view_layer, "use_pass_subsurface_color", text="Color", toggle=True)
+
+ split = layout.split(factor=0.35)
+ split.use_property_split = False
+ split.label(text="Volume")
+ row = split.row(align=True)
+ row.prop(cycles_view_layer, "use_pass_volume_direct", text="Direct", toggle=True)
+ row.prop(cycles_view_layer, "use_pass_volume_indirect", text="Indirect", toggle=True)
+
+ col = layout.column(align=True)
+ col.prop(view_layer, "use_pass_emit", text="Emission")
+ col.prop(view_layer, "use_pass_environment")
+ col.prop(view_layer, "use_pass_shadow")
+ col.prop(view_layer, "use_pass_ambient_occlusion", text="Ambient Occlusion")
+
+
+class CYCLES_RENDER_PT_passes_crypto(CyclesButtonsPanel, Panel):
+ bl_label = "Cryptomatte"
+ bl_context = "view_layer"
+ bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+ def draw(self, context):
+ import _cycles
+
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ cycles_view_layer = context.view_layer.cycles
+
+ row = layout.row(align=True)
+ row.use_property_split = False
+ row.prop(cycles_view_layer, "use_pass_crypto_object", text="Object", toggle=True)
+ row.prop(cycles_view_layer, "use_pass_crypto_material", text="Material", toggle=True)
+ row.prop(cycles_view_layer, "use_pass_crypto_asset", text="Asset", toggle=True)
+
+ layout.prop(cycles_view_layer, "pass_crypto_depth", text="Levels")
+
+ row = layout.row(align=True)
+ row.active = use_cpu(context)
+ row.prop(cycles_view_layer, "pass_crypto_accurate", text="Accurate Mode")
+
+
+class CYCLES_RENDER_PT_passes_debug(CyclesButtonsPanel, Panel):
+ bl_label = "Debug"
+ bl_context = "view_layer"
+ bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+ @classmethod
+ def poll(cls, context):
+ import _cycles
+ return _cycles.with_cycles_debug
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ cycles_view_layer = context.view_layer.cycles
+
+ layout.prop(cycles_view_layer, "pass_debug_bvh_traversed_nodes")
+ layout.prop(cycles_view_layer, "pass_debug_bvh_traversed_instances")
+ layout.prop(cycles_view_layer, "pass_debug_bvh_intersections")
+ layout.prop(cycles_view_layer, "pass_debug_ray_bounces")
+
+
+class CYCLES_RENDER_UL_aov(bpy.types.UIList):
+ def draw_item(self, context, layout, data, item, icon, active_data, active_propname):
+ row = layout.row()
+ split = row.split(factor=0.65)
+ icon = 'ERROR' if item.conflict else 'NONE'
+ split.row().prop(item, "name", text="", icon=icon, emboss=False)
+ split.row().prop(item, "type", text="", emboss=False)
+
+
+class CYCLES_RENDER_PT_passes_aov(CyclesButtonsPanel, Panel):
+ bl_label = "Shader AOV"
+ bl_context = "view_layer"
+ bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ cycles_view_layer = context.view_layer.cycles
+
+ row = layout.row()
+ col = row.column()
+ col.template_list("CYCLES_RENDER_UL_aov", "aovs", cycles_view_layer, "aovs", cycles_view_layer, "active_aov", rows=2)
+
+ col = row.column()
+ sub = col.column(align=True)
+ sub.operator("cycles.add_aov", icon='ADD', text="")
+ sub.operator("cycles.remove_aov", icon='REMOVE', text="")
+
+ if cycles_view_layer.active_aov < len(cycles_view_layer.aovs):
+ active_aov = cycles_view_layer.aovs[cycles_view_layer.active_aov]
+ if active_aov.conflict:
+ layout.label(text=active_aov.conflict, icon='ERROR')
+
+
+class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
+ bl_label = "Denoising"
+ bl_context = "view_layer"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ def draw_header(self, context):
+ scene = context.scene
+ view_layer = context.view_layer
+ cycles_view_layer = view_layer.cycles
+ layout = self.layout
+
+ layout.prop(cycles_view_layer, "use_denoising", text="")
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ view_layer = context.view_layer
+ cycles_view_layer = view_layer.cycles
+
+ split = layout.split()
+ split.active = cycles_view_layer.use_denoising
+
+ col = split.column(align=True)
+
+ if use_optix(context):
+ col.prop(cycles_view_layer, "use_optix_denoising", text="OptiX AI Denoising")
+
+ if cycles_view_layer.use_optix_denoising:
+ col.prop(cycles_view_layer, "denoising_optix_input_passes")
+ return
+
+ col.separator(factor=2.0)
+
+ col.prop(cycles_view_layer, "denoising_radius", text="Radius")
+ col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength")
+ col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength")
+ col.prop(cycles_view_layer, "denoising_relative_pca")
+
+ layout.separator()
+
+ split = layout.split(factor=0.5)
+ split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+
+ col = split.column()
+ col.alignment = 'RIGHT'
+ col.label(text="Diffuse")
+
+ row = split.row(align=True)
+ row.use_property_split = False
+ row.prop(cycles_view_layer, "denoising_diffuse_direct", text="Direct", toggle=True)
+ row.prop(cycles_view_layer, "denoising_diffuse_indirect", text="Indirect", toggle=True)
+
+ split = layout.split(factor=0.5)
+ split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+
+ col = split.column()
+ col.alignment = 'RIGHT'
+ col.label(text="Glossy")
+
+ row = split.row(align=True)
+ row.use_property_split = False
+ row.prop(cycles_view_layer, "denoising_glossy_direct", text="Direct", toggle=True)
+ row.prop(cycles_view_layer, "denoising_glossy_indirect", text="Indirect", toggle=True)
+
+ split = layout.split(factor=0.5)
+ split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+
+ col = split.column()
+ col.alignment = 'RIGHT'
+ col.label(text="Transmission")
+
+ row = split.row(align=True)
+ row.use_property_split = False
+ row.prop(cycles_view_layer, "denoising_transmission_direct", text="Direct", toggle=True)
+ row.prop(cycles_view_layer, "denoising_transmission_indirect", text="Indirect", toggle=True)
+
+ split = layout.split(factor=0.5)
+ split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+
+ col = split.column()
+ col.alignment = 'RIGHT'
+ col.label(text="Subsurface")
+
+ row = split.row(align=True)
+ row.use_property_split = False
+ row.prop(cycles_view_layer, "denoising_subsurface_direct", text="Direct", toggle=True)
+ row.prop(cycles_view_layer, "denoising_subsurface_indirect", text="Indirect", toggle=True)
+
+
+class CYCLES_PT_post_processing(CyclesButtonsPanel, Panel):
+ bl_label = "Post Processing"
+ bl_options = {'DEFAULT_CLOSED'}
+ bl_context = "output"
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ rd = context.scene.render
+
+ col = layout.column(align=True)
+ col.prop(rd, "use_compositing")
+ col.prop(rd, "use_sequencer")
+
+ layout.prop(rd, "dither_intensity", text="Dither", slider=True)
+
+
+class CYCLES_CAMERA_PT_dof(CyclesButtonsPanel, Panel):
+ bl_label = "Depth of Field"
+ bl_context = "data"
+
+ @classmethod
+ def poll(cls, context):
+ return context.camera and CyclesButtonsPanel.poll(context)
+
+ def draw_header(self, context):
+ cam = context.camera
+ dof = cam.dof
+ self.layout.prop(dof, "use_dof", text="")
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+
+ cam = context.camera
+ dof = cam.dof
+ layout.active = dof.use_dof
+
+ split = layout.split()
+
+ col = split.column()
+ col.prop(dof, "focus_object", text="Focus Object")
+
+ sub = col.row()
+ sub.active = dof.focus_object is None
+ sub.prop(dof, "focus_distance", text="Distance")
+
+
+class CYCLES_CAMERA_PT_dof_aperture(CyclesButtonsPanel, Panel):
+ bl_label = "Aperture"
+ bl_parent_id = "CYCLES_CAMERA_PT_dof"
+
+ @classmethod
+ def poll(cls, context):
+ return context.camera and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+
+ cam = context.camera
+ dof = cam.dof
+ layout.active = dof.use_dof
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+
+ col = flow.column()
+ col.prop(dof, "aperture_fstop")
+ col.prop(dof, "aperture_blades")
+ col.prop(dof, "aperture_rotation")
+ col.prop(dof, "aperture_ratio")
+
+
+class CYCLES_PT_context_material(CyclesButtonsPanel, Panel):
+ bl_label = ""
+ bl_context = "material"
+ bl_options = {'HIDE_HEADER'}
+
+ @classmethod
+ def poll(cls, context):
+ if context.active_object and context.active_object.type == 'GPENCIL':
+ return False
+ else:
+ return (context.material or context.object) and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+
+ mat = context.material
+ ob = context.object
+ slot = context.material_slot
+ space = context.space_data
+
+ if ob:
+ is_sortable = len(ob.material_slots) > 1
+ rows = 1
+ if (is_sortable):
+ rows = 4
+
+ row = layout.row()
+
+ row.template_list("MATERIAL_UL_matslots", "", ob, "material_slots", ob, "active_material_index", rows=rows)
+
+ col = row.column(align=True)
+ col.operator("object.material_slot_add", icon='ADD', text="")
+ col.operator("object.material_slot_remove", icon='REMOVE', text="")
+
+ col.menu("MATERIAL_MT_context_menu", icon='DOWNARROW_HLT', text="")
+
+ if is_sortable:
+ col.separator()
+
+ col.operator("object.material_slot_move", icon='TRIA_UP', text="").direction = 'UP'
+ col.operator("object.material_slot_move", icon='TRIA_DOWN', text="").direction = 'DOWN'
+
+ if ob.mode == 'EDIT':
+ row = layout.row(align=True)
+ row.operator("object.material_slot_assign", text="Assign")
+ row.operator("object.material_slot_select", text="Select")
+ row.operator("object.material_slot_deselect", text="Deselect")
+
+ split = layout.split(factor=0.65)
+
+ if ob:
+ split.template_ID(ob, "active_material", new="material.new")
+ row = split.row()
+
+ if slot:
+ row.prop(slot, "link", text="")
+ else:
+ row.label()
+ elif mat:
+ split.template_ID(space, "pin_id")
+ split.separator()
+
+
+class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
+ bl_label = "Motion Blur"
+ bl_context = "object"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ ob = context.object
+ if CyclesButtonsPanel.poll(context) and ob:
+ if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA'}:
+ return True
+ if ob.instance_type == 'COLLECTION' and ob.instance_collection:
+ return True
+ # TODO(sergey): More duplicator types here?
+ return False
+
+ def draw_header(self, context):
+ layout = self.layout
+
+ rd = context.scene.render
+ # scene = context.scene
+
+ layout.active = rd.use_motion_blur
+
+ ob = context.object
+ cob = ob.cycles
+
+ layout.prop(cob, "use_motion_blur", text="")
+
+ def draw(self, context):
+ layout = self.layout
+
+ rd = context.scene.render
+ # scene = context.scene
+
+ ob = context.object
+ cob = ob.cycles
+
+ layout.active = (rd.use_motion_blur and cob.use_motion_blur)
+
+ row = layout.row()
+ if ob.type != 'CAMERA':
+ row.prop(cob, "use_deform_motion", text="Deformation")
+ row.prop(cob, "motion_steps", text="Steps")
+
+
+def has_geometry_visibility(ob):
+ return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or
+ (ob.instance_type == 'COLLECTION' and ob.instance_collection))
+
+
+class CYCLES_OBJECT_PT_visibility(CyclesButtonsPanel, Panel):
+ bl_label = "Visibility"
+ bl_context = "object"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ return CyclesButtonsPanel.poll(context) and (context.object)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+
+ flow = layout.grid_flow(row_major=False, columns=0, even_columns=True, even_rows=False, align=False)
+ layout = self.layout
+ ob = context.object
+
+ col = flow.column()
+ col.prop(ob, "hide_viewport", text="Show in Viewports", invert_checkbox=True, toggle=False)
+ col = flow.column()
+ col.prop(ob, "hide_render", text="Show in Renders", invert_checkbox=True, toggle=False)
+ col = flow.column()
+ col.prop(ob, "hide_select", text="Selectable", invert_checkbox=True, toggle=False)
+
+ if has_geometry_visibility(ob):
+ cob = ob.cycles
+ col = flow.column()
+ col.prop(cob, "is_shadow_catcher")
+ col = flow.column()
+ col.prop(cob, "is_holdout")
+
+
+class CYCLES_OBJECT_PT_visibility_ray_visibility(CyclesButtonsPanel, Panel):
+ bl_label = "Ray Visibility"
+ bl_parent_id = "CYCLES_OBJECT_PT_visibility"
+ bl_context = "object"
+
+ @classmethod
+ def poll(cls, context):
+ ob = context.object
+ return CyclesButtonsPanel.poll(context) and has_geometry_visibility(ob)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ ob = context.object
+ cob = ob.cycles
+ visibility = ob.cycles_visibility
+
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+
+ col = flow.column()
+ col.prop(visibility, "camera")
+ col = flow.column()
+ col.prop(visibility, "diffuse")
+ col = flow.column()
+ col.prop(visibility, "glossy")
+ col = flow.column()
+ col.prop(visibility, "transmission")
+ col = flow.column()
+ col.prop(visibility, "scatter")
+
+ if ob.type != 'LIGHT':
+ col = flow.column()
+ col.prop(visibility, "shadow")
+
+ layout.separator()
+
+
+class CYCLES_OBJECT_PT_visibility_culling(CyclesButtonsPanel, Panel):
+ bl_label = "Culling"
+ bl_parent_id = "CYCLES_OBJECT_PT_visibility"
+ bl_context = "object"
+
+ @classmethod
+ def poll(cls, context):
+ ob = context.object
+ return CyclesButtonsPanel.poll(context) and has_geometry_visibility(ob)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ cscene = scene.cycles
+ ob = context.object
+ cob = ob.cycles
+
+ flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+
+ col = flow.column()
+ col.active = scene.render.use_simplify and cscene.use_camera_cull
+ col.prop(cob, "use_camera_cull")
+
+ col = flow.column()
+ col.active = scene.render.use_simplify and cscene.use_distance_cull
+ col.prop(cob, "use_distance_cull")
+
+
+def panel_node_draw(layout, id_data, output_type, input_name):
+ if not id_data.use_nodes:
+ layout.operator("cycles.use_shading_nodes", icon='NODETREE')
+ return False
+
+ ntree = id_data.node_tree
+
+ node = ntree.get_output_node('CYCLES')
+ if node:
+ input = find_node_input(node, input_name)
+ if input:
+ layout.template_node_view(ntree, node, input)
+ else:
+ layout.label(text="Incompatible output node")
+ else:
+ layout.label(text="No output node")
+
+ return True
+
+
+class CYCLES_LIGHT_PT_preview(CyclesButtonsPanel, Panel):
+ bl_label = "Preview"
+ bl_context = "data"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ return (
+ context.light and
+ not (
+ context.light.type == 'AREA' and
+ context.light.cycles.is_portal
+ ) and
+ CyclesButtonsPanel.poll(context)
+ )
+
+ def draw(self, context):
+ self.layout.template_preview(context.light)
+
+
+class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel):
+ bl_label = "Light"
+ bl_context = "data"
+
+ @classmethod
+ def poll(cls, context):
+ return context.light and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+
+ light = context.light
+ clamp = light.cycles
+
+ layout.use_property_decorate = False
+
+ if self.bl_space_type == 'PROPERTIES':
+ layout.row().prop(light, "type", expand=True)
+ layout.use_property_split = True
+ else:
+ layout.use_property_split = True
+ layout.row().prop(light, "type")
+
+ col = layout.column()
+
+ col.prop(light, "color")
+ col.prop(light, "energy")
+ col.separator()
+
+ if light.type in {'POINT', 'SPOT'}:
+ col.prop(light, "shadow_soft_size", text="Size")
+ elif light.type == 'SUN':
+ col.prop(light, "angle")
+ elif light.type == 'AREA':
+ col.prop(light, "shape", text="Shape")
+ sub = col.column(align=True)
+
+ if light.shape in {'SQUARE', 'DISK'}:
+ sub.prop(light, "size")
+ elif light.shape in {'RECTANGLE', 'ELLIPSE'}:
+ sub.prop(light, "size", text="Size X")
+ sub.prop(light, "size_y", text="Y")
+
+ if not (light.type == 'AREA' and clamp.is_portal):
+ sub = col.column()
+ if use_branched_path(context):
+ subsub = sub.row(align=True)
+ subsub.active = use_sample_all_lights(context)
+ subsub.prop(clamp, "samples")
+ sub.prop(clamp, "max_bounces")
+
+ sub = col.column(align=True)
+ sub.active = not (light.type == 'AREA' and clamp.is_portal)
+ sub.prop(clamp, "cast_shadow")
+ sub.prop(clamp, "use_multiple_importance_sampling", text="Multiple Importance")
+
+ if light.type == 'AREA':
+ col.prop(clamp, "is_portal", text="Portal")
+
+
+class CYCLES_LIGHT_PT_nodes(CyclesButtonsPanel, Panel):
+ bl_label = "Nodes"
+ bl_context = "data"
+
+ @classmethod
+ def poll(cls, context):
+ return context.light and not (context.light.type == 'AREA' and
+ context.light.cycles.is_portal) and \
+ CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+
+ light = context.light
+ panel_node_draw(layout, light, 'OUTPUT_LIGHT', 'Surface')
+
+
+class CYCLES_LIGHT_PT_spot(CyclesButtonsPanel, Panel):
+ bl_label = "Spot Shape"
+ bl_context = "data"
+
+ @classmethod
+ def poll(cls, context):
+ light = context.light
+ return (light and light.type == 'SPOT') and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+ light = context.light
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ col = layout.column()
+ col.prop(light, "spot_size", text="Size")
+ col.prop(light, "spot_blend", text="Blend", slider=True)
+ col.prop(light, "show_cone")
+
+
+class CYCLES_WORLD_PT_preview(CyclesButtonsPanel, Panel):
+ bl_label = "Preview"
+ bl_context = "world"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ return context.world and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ self.layout.template_preview(context.world)
+
+
+class CYCLES_WORLD_PT_surface(CyclesButtonsPanel, Panel):
+ bl_label = "Surface"
+ bl_context = "world"
+
+ @classmethod
+ def poll(cls, context):
+ return context.world and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+
+ world = context.world
+
+ if not panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Surface'):
+ layout.prop(world, "color")
+
+
+class CYCLES_WORLD_PT_volume(CyclesButtonsPanel, Panel):
+ bl_label = "Volume"
+ bl_context = "world"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ world = context.world
+ return world and world.node_tree and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+
+ world = context.world
+ panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Volume')
+
+
+class CYCLES_WORLD_PT_ambient_occlusion(CyclesButtonsPanel, Panel):
+ bl_label = "Ambient Occlusion"
+ bl_context = "world"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ return context.world and CyclesButtonsPanel.poll(context)
+
+ def draw_header(self, context):
+ light = context.world.light_settings
+ self.layout.prop(light, "use_ambient_occlusion", text="")
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ light = context.world.light_settings
+ scene = context.scene
+
+ col = layout.column()
+ sub = col.column()
+ sub.active = light.use_ambient_occlusion or scene.render.use_simplify
+ sub.prop(light, "ao_factor", text="Factor")
+ col.prop(light, "distance", text="Distance")
+
+
+class CYCLES_WORLD_PT_mist(CyclesButtonsPanel, Panel):
+ bl_label = "Mist Pass"
+ bl_context = "world"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ if CyclesButtonsPanel.poll(context):
+ if context.world:
+ for view_layer in context.scene.view_layers:
+ if view_layer.use_pass_mist:
+ return True
+
+ return False
+
+ def draw(self, context):
+ layout = self.layout
+
+ world = context.world
+
+ split = layout.split(align=True)
+ split.prop(world.mist_settings, "start")
+ split.prop(world.mist_settings, "depth")
+
+ layout.prop(world.mist_settings, "falloff")
+
+
+class CYCLES_WORLD_PT_ray_visibility(CyclesButtonsPanel, Panel):
+ bl_label = "Ray Visibility"
+ bl_context = "world"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ return CyclesButtonsPanel.poll(context) and context.world
+
+ def draw(self, context):
+ layout = self.layout
+
+ world = context.world
+ visibility = world.cycles_visibility
+
+ flow = layout.column_flow()
+
+ flow.prop(visibility, "camera")
+ flow.prop(visibility, "diffuse")
+ flow.prop(visibility, "glossy")
+ flow.prop(visibility, "transmission")
+ flow.prop(visibility, "scatter")
+
+
+class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel):
+ bl_label = "Settings"
+ bl_context = "world"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ return context.world and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ layout.column()
+
+
+class CYCLES_WORLD_PT_settings_surface(CyclesButtonsPanel, Panel):
+ bl_label = "Surface"
+ bl_parent_id = "CYCLES_WORLD_PT_settings"
+ bl_context = "world"
+
+ @classmethod
+ def poll(cls, context):
+ return context.world and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ world = context.world
+ cworld = world.cycles
+
+ col = layout.column()
+ col.prop(cworld, "sampling_method", text="Sampling")
+
+ sub = col.column()
+ sub.active = cworld.sampling_method != 'NONE'
+ subsub = sub.row(align=True)
+ subsub.active = cworld.sampling_method == 'MANUAL'
+ subsub.prop(cworld, "sample_map_resolution")
+ if use_branched_path(context):
+ subsub = sub.column(align=True)
+ subsub.active = use_sample_all_lights(context)
+ subsub.prop(cworld, "samples")
+ sub.prop(cworld, "max_bounces")
+
+
+class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel):
+ bl_label = "Volume"
+ bl_parent_id = "CYCLES_WORLD_PT_settings"
+ bl_context = "world"
+
+ @classmethod
+ def poll(cls, context):
+ return context.world and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ world = context.world
+ cworld = world.cycles
+
+ col = layout.column()
+
+ sub = col.column()
+ sub.active = use_cpu(context)
+ sub.prop(cworld, "volume_sampling", text="Sampling")
+ col.prop(cworld, "volume_interpolation", text="Interpolation")
+ col.prop(cworld, "homogeneous_volume", text="Homogeneous")
+
+
+class CYCLES_MATERIAL_PT_preview(CyclesButtonsPanel, Panel):
+ bl_label = "Preview"
+ bl_context = "material"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ mat = context.material
+ return mat and (not mat.grease_pencil) and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ self.layout.template_preview(context.material)
+
+
+class CYCLES_MATERIAL_PT_surface(CyclesButtonsPanel, Panel):
+ bl_label = "Surface"
+ bl_context = "material"
+
+ @classmethod
+ def poll(cls, context):
+ mat = context.material
+ return mat and (not mat.grease_pencil) and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+
+ mat = context.material
+ if not panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Surface'):
+ layout.prop(mat, "diffuse_color")
+
+
+class CYCLES_MATERIAL_PT_volume(CyclesButtonsPanel, Panel):
+ bl_label = "Volume"
+ bl_context = "material"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ mat = context.material
+ return mat and (not mat.grease_pencil) and mat.node_tree and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+
+ mat = context.material
+ # cmat = mat.cycles
+
+ panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Volume')
+
+
+class CYCLES_MATERIAL_PT_displacement(CyclesButtonsPanel, Panel):
+ bl_label = "Displacement"
+ bl_context = "material"
+
+ @classmethod
+ def poll(cls, context):
+ mat = context.material
+ return mat and (not mat.grease_pencil) and mat.node_tree and CyclesButtonsPanel.poll(context)
+
+ def draw(self, context):
+ layout = self.layout
+
+ mat = context.material
+ panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Displacement')
+
+
+class CYCLES_MATERIAL_PT_settings(CyclesButtonsPanel, Panel):
+ bl_label = "Settings"
+ bl_context = "material"
+ bl_options = {'DEFAULT_CLOSED'}
+
+ @classmethod
+ def poll(cls, context):
+ mat = context.material
+ return mat and (not mat.grease_pencil) and CyclesButtonsPanel.poll(context)
+
+ @staticmethod
+ def draw_shared(self, mat):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ layout.prop(mat, "pass_index")
+
+ def draw(self, context):
+ self.draw_shared(self, context.material)
+
+
+class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel):
+ bl_label = "Surface"
+ bl_parent_id = "CYCLES_MATERIAL_PT_settings"
+ bl_context = "material"
+
+ @staticmethod
+ def draw_shared(self, mat):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ cmat = mat.cycles
+
+ col = layout.column()
+ col.prop(cmat, "sample_as_light", text="Multiple Importance")
+ col.prop(cmat, "use_transparent_shadow")
+ col.prop(cmat, "displacement_method", text="Displacement")
+
+ def draw(self, context):
+ self.draw_shared(self, context.material)
+
+
+class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel):
+ bl_label = "Volume"
+ bl_parent_id = "CYCLES_MATERIAL_PT_settings"
+ bl_context = "material"
+
+ @staticmethod
+ def draw_shared(self, context, mat):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ cmat = mat.cycles
+
+ col = layout.column()
+ sub = col.column()
+ sub.active = use_cpu(context)
+ sub.prop(cmat, "volume_sampling", text="Sampling")
+ col.prop(cmat, "volume_interpolation", text="Interpolation")
+ col.prop(cmat, "homogeneous_volume", text="Homogeneous")
+
+ def draw(self, context):
+ self.draw_shared(self, context, context.material)
+
+
+class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel):
+ bl_label = "Bake"
+ bl_context = "render"
+ bl_options = {'DEFAULT_CLOSED'}
+ COMPAT_ENGINES = {'CYCLES'}
+
+ @classmethod
+ def poll(cls, context):
+ return CyclesButtonsPanel.poll(context) and not use_optix(context)
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False # No animation.
+
+ scene = context.scene
+ cscene = scene.cycles
+ cbk = scene.render.bake
+ rd = scene.render
+
+ if rd.use_bake_multires:
+ layout.operator("object.bake_image", icon='RENDER_STILL')
+ layout.prop(rd, "use_bake_multires")
+ layout.prop(rd, "bake_type")
+
+ else:
+ layout.operator("object.bake", icon='RENDER_STILL').type = cscene.bake_type
+ layout.prop(rd, "use_bake_multires")
+ layout.prop(cscene, "bake_type")
+
+
+class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel):
+ bl_label = "Influence"
+ bl_context = "render"
+ bl_parent_id = "CYCLES_RENDER_PT_bake"
+ COMPAT_ENGINES = {'CYCLES'}
+ @classmethod
+ def poll(cls, context):
+ scene = context.scene
+ cscene = scene.cycles
+ rd = scene.render
+ if rd.use_bake_multires == False and cscene.bake_type in {
+ 'NORMAL', 'COMBINED', 'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}:
+ return True
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False # No animation.
+
+ scene = context.scene
+ cscene = scene.cycles
+ cbk = scene.render.bake
+ rd = scene.render
+
+ col = layout.column()
+
+ if cscene.bake_type == 'NORMAL':
+ col.prop(cbk, "normal_space", text="Space")
+
+ sub = col.column(align=True)
+ sub.prop(cbk, "normal_r", text="Swizzle R")
+ sub.prop(cbk, "normal_g", text="G")
+ sub.prop(cbk, "normal_b", text="B")
+
+ elif cscene.bake_type == 'COMBINED':
+ row = col.row(align=True)
+ row.use_property_split = False
+ row.prop(cbk, "use_pass_direct", toggle=True)
+ row.prop(cbk, "use_pass_indirect", toggle=True)
+
+ flow = col.grid_flow(row_major=False, columns=0, even_columns=False, even_rows=False, align=True)
+
+ flow.active = cbk.use_pass_direct or cbk.use_pass_indirect
+ flow.prop(cbk, "use_pass_diffuse")
+ flow.prop(cbk, "use_pass_glossy")
+ flow.prop(cbk, "use_pass_transmission")
+ flow.prop(cbk, "use_pass_subsurface")
+ flow.prop(cbk, "use_pass_ambient_occlusion")
+ flow.prop(cbk, "use_pass_emit")
+
+ elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}:
+ row = col.row(align=True)
+ row.use_property_split = False
+ row.prop(cbk, "use_pass_direct", toggle=True)
+ row.prop(cbk, "use_pass_indirect", toggle=True)
+ row.prop(cbk, "use_pass_color", toggle=True)
+
+
+class CYCLES_RENDER_PT_bake_selected_to_active(CyclesButtonsPanel, Panel):
+ bl_label = "Selected to Active"
+ bl_context = "render"
+ bl_parent_id = "CYCLES_RENDER_PT_bake"
+ bl_options = {'DEFAULT_CLOSED'}
+ COMPAT_ENGINES = {'CYCLES'}
+
+ @classmethod
+ def poll(cls, context):
+ scene = context.scene
+ rd = scene.render
+ return rd.use_bake_multires == False
+
+ def draw_header(self, context):
+ scene = context.scene
+ cbk = scene.render.bake
+ self.layout.prop(cbk, "use_selected_to_active", text="")
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False # No animation.
+
+ scene = context.scene
+ cscene = scene.cycles
+ cbk = scene.render.bake
+ rd = scene.render
+
+ layout.active = cbk.use_selected_to_active
+ col = layout.column()
+
+ col.prop(cbk, "use_cage", text="Cage")
+ if cbk.use_cage:
+ col.prop(cbk, "cage_extrusion", text="Extrusion")
+ col.prop(cbk, "cage_object", text="Cage Object")
+ else:
+ col.prop(cbk, "cage_extrusion", text="Ray Distance")
+
+
+class CYCLES_RENDER_PT_bake_output(CyclesButtonsPanel, Panel):
+ bl_label = "Output"
+ bl_context = "render"
+ bl_parent_id = "CYCLES_RENDER_PT_bake"
+ COMPAT_ENGINES = {'CYCLES'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False # No animation.
+
+ scene = context.scene
+ cscene = scene.cycles
+ cbk = scene.render.bake
+ rd = scene.render
+
+ if rd.use_bake_multires:
+ layout.prop(rd, "bake_margin")
+ layout.prop(rd, "use_bake_clear", text="Clear Image")
+
+ if rd.bake_type == 'DISPLACEMENT':
+ layout.prop(rd, "use_bake_lores_mesh")
+ else:
+
+ layout.prop(cbk, "margin")
+ layout.prop(cbk, "use_clear", text="Clear Image")
+
+
+class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel):
+ bl_label = "Debug"
+ bl_context = "render"
+ bl_options = {'DEFAULT_CLOSED'}
+ COMPAT_ENGINES = {'CYCLES'}
+
+ @classmethod
+ def poll(cls, context):
+ return CyclesButtonsPanel.poll(context) and bpy.app.debug_value == 256
+
+ def draw(self, context):
+ layout = self.layout
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ col = layout.column()
+
+ col.label(text="CPU Flags:")
+ row = col.row(align=True)
+ row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
+ row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
+ row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
+ row.prop(cscene, "debug_use_cpu_avx", toggle=True)
+ row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
+ col.prop(cscene, "debug_bvh_layout")
+ col.prop(cscene, "debug_use_cpu_split_kernel")
+
+ col.separator()
+
+ col = layout.column()
+ col.label(text="CUDA Flags:")
+ col.prop(cscene, "debug_use_cuda_adaptive_compile")
+ col.prop(cscene, "debug_use_cuda_split_kernel")
+
+ col.separator()
+
+ col = layout.column()
+ col.label(text="OptiX Flags:")
+ col.prop(cscene, "debug_optix_cuda_streams")
+
+ col.separator()
+
+ col = layout.column()
+ col.label(text="OpenCL Flags:")
+ col.prop(cscene, "debug_opencl_device_type", text="Device")
+ col.prop(cscene, "debug_use_opencl_debug", text="Debug")
+ col.prop(cscene, "debug_opencl_mem_limit")
+
+ col.separator()
+
+ col = layout.column()
+ col.prop(cscene, "debug_bvh_type")
+
+
+class CYCLES_RENDER_PT_simplify(CyclesButtonsPanel, Panel):
+ bl_label = "Simplify"
+ bl_context = "render"
+ bl_options = {'DEFAULT_CLOSED'}
+ COMPAT_ENGINES = {'CYCLES'}
+
+ def draw_header(self, context):
+ rd = context.scene.render
+ self.layout.prop(rd, "use_simplify", text="")
+
+ def draw(self, context):
+ pass
+
+
+class CYCLES_RENDER_PT_simplify_viewport(CyclesButtonsPanel, Panel):
+ bl_label = "Viewport"
+ bl_context = "render"
+ bl_parent_id = "CYCLES_RENDER_PT_simplify"
+ COMPAT_ENGINES = {'CYCLES'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ rd = scene.render
+ cscene = scene.cycles
+
+ layout.active = rd.use_simplify
+
+ col = layout.column()
+ col.prop(rd, "simplify_subdivision", text="Max Subdivision")
+ col.prop(rd, "simplify_child_particles", text="Child Particles")
+ col.prop(cscene, "texture_limit", text="Texture Limit")
+ col.prop(cscene, "ao_bounces", text="AO Bounces")
+ col.prop(rd, "use_simplify_smoke_highres")
+
+class CYCLES_RENDER_PT_simplify_render(CyclesButtonsPanel, Panel):
+ bl_label = "Render"
+ bl_context = "render"
+ bl_parent_id = "CYCLES_RENDER_PT_simplify"
+ COMPAT_ENGINES = {'CYCLES'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ rd = scene.render
+ cscene = scene.cycles
+
+ layout.active = rd.use_simplify
+
+ col = layout.column()
+
+ col.prop(rd, "simplify_subdivision_render", text="Max Subdivision")
+ col.prop(rd, "simplify_child_particles_render", text="Child Particles")
+ col.prop(cscene, "texture_limit_render", text="Texture Limit")
+ col.prop(cscene, "ao_bounces_render", text="AO Bounces")
+
+
+class CYCLES_RENDER_PT_simplify_culling(CyclesButtonsPanel, Panel):
+ bl_label = "Culling"
+ bl_context = "render"
+ bl_parent_id = "CYCLES_RENDER_PT_simplify"
+ bl_options = {'DEFAULT_CLOSED'}
+ COMPAT_ENGINES = {'CYCLES'}
+
+ def draw(self, context):
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ scene = context.scene
+ rd = scene.render
+ cscene = scene.cycles
+
+ layout.active = rd.use_simplify
+
+ col = layout.column()
+ col.prop(cscene, "use_camera_cull")
+ sub = col.column()
+ sub.active = cscene.use_camera_cull
+ sub.prop(cscene, "camera_cull_margin")
+
+ col = layout.column()
+ col.prop(cscene, "use_distance_cull")
+ sub = col.column()
+ sub.active = cscene.use_distance_cull
+ sub.prop(cscene, "distance_cull_margin", text="Distance")
+
+
+class CYCLES_VIEW3D_PT_shading_render_pass(Panel):
+ bl_space_type = 'VIEW_3D'
+ bl_region_type = 'HEADER'
+ bl_label = "Render Pass"
+ bl_parent_id = 'VIEW3D_PT_shading'
+ COMPAT_ENGINES = {'CYCLES'}
+
+ @classmethod
+ def poll(cls, context):
+ return (context.engine in cls.COMPAT_ENGINES
+ and context.space_data.shading.type == 'RENDERED')
+
+ def draw(self, context):
+ shading = context.space_data.shading
+
+ layout = self.layout
+ layout.prop(shading.cycles, "render_pass", text="")
+
+
+class CYCLES_VIEW3D_PT_shading_lighting(Panel):
+ bl_space_type = 'VIEW_3D'
+ bl_region_type = 'HEADER'
+ bl_label = "Lighting"
+ bl_parent_id = 'VIEW3D_PT_shading'
+ COMPAT_ENGINES = {'CYCLES'}
+
+ @classmethod
+ def poll(cls, context):
+ return (context.engine in cls.COMPAT_ENGINES
+ and context.space_data.shading.type == 'RENDERED')
+
+ def draw(self, context):
+ layout = self.layout
+ col = layout.column()
+ split = col.split(factor=0.9)
+
+ shading = context.space_data.shading
+ col.prop(shading, "use_scene_lights_render")
+ col.prop(shading, "use_scene_world_render")
+
+ if not shading.use_scene_world_render:
+ col = layout.column()
+ split = col.split(factor=0.9)
+
+ col = split.column()
+ sub = col.row()
+ sub.scale_y = 0.6
+ sub.template_icon_view(shading, "studio_light", scale_popup=3)
+
+ col = split.column()
+ col.operator("preferences.studiolight_show", emboss=False, text="", icon='PREFERENCES')
+
+ split = layout.split(factor=0.9)
+ col = split.column()
+ col.prop(shading, "studiolight_rotate_z", text="Rotation")
+ col.prop(shading, "studiolight_intensity")
+ col.prop(shading, "studiolight_background_alpha")
+
+class CYCLES_VIEW3D_PT_simplify_greasepencil(CyclesButtonsPanel, Panel, GreasePencilSimplifyPanel):
+ bl_label = "Grease Pencil"
+ bl_parent_id = "CYCLES_RENDER_PT_simplify"
+ COMPAT_ENGINES = {'CYCLES'}
+ bl_options = {'DEFAULT_CLOSED'}
+
+def draw_device(self, context):
+ scene = context.scene
+ layout = self.layout
+ layout.use_property_split = True
+ layout.use_property_decorate = False
+
+ if context.engine == 'CYCLES':
+ from . import engine
+ cscene = scene.cycles
+
+ col = layout.column()
+ col.prop(cscene, "feature_set")
+
+ scene = context.scene
+
+ col = layout.column()
+ col.active = show_device_active(context)
+ col.prop(cscene, "device")
+
+ from . import engine
+ if engine.with_osl() and use_cpu(context):
+ col.prop(cscene, "shading_system")
+
+
+def draw_pause(self, context):
+ layout = self.layout
+ scene = context.scene
+
+ if context.engine == "CYCLES":
+ view = context.space_data
+
+ if view.shading.type == 'RENDERED':
+ cscene = scene.cycles
+ layout.prop(cscene, "preview_pause", icon='PLAY' if cscene.preview_pause else 'PAUSE', text="")
+
+
+def get_panels():
+ exclude_panels = {
+ 'DATA_PT_area',
+ 'DATA_PT_camera_dof',
+ 'DATA_PT_falloff_curve',
+ 'DATA_PT_light',
+ 'DATA_PT_preview',
+ 'DATA_PT_spot',
+ 'MATERIAL_PT_context_material',
+ 'MATERIAL_PT_preview',
+ 'NODE_DATA_PT_light',
+ 'NODE_DATA_PT_spot',
+ 'OBJECT_PT_visibility',
+ 'VIEWLAYER_PT_filter',
+ 'VIEWLAYER_PT_layer_passes',
+ 'RENDER_PT_post_processing',
+ 'RENDER_PT_simplify',
+ }
+
+ panels = []
+ for panel in bpy.types.Panel.__subclasses__():
+ if hasattr(panel, 'COMPAT_ENGINES') and 'BLENDER_RENDER' in panel.COMPAT_ENGINES:
+ if panel.__name__ not in exclude_panels:
+ panels.append(panel)
+
+ return panels
+
+
+classes = (
+ CYCLES_PT_sampling_presets,
+ CYCLES_PT_integrator_presets,
+ CYCLES_RENDER_PT_sampling,
+ CYCLES_RENDER_PT_sampling_sub_samples,
+ CYCLES_RENDER_PT_sampling_advanced,
+ CYCLES_RENDER_PT_light_paths,
+ CYCLES_RENDER_PT_light_paths_max_bounces,
+ CYCLES_RENDER_PT_light_paths_clamping,
+ CYCLES_RENDER_PT_light_paths_caustics,
+ CYCLES_RENDER_PT_volumes,
+ CYCLES_RENDER_PT_subdivision,
+ CYCLES_RENDER_PT_hair,
+ CYCLES_RENDER_PT_simplify,
+ CYCLES_RENDER_PT_simplify_viewport,
+ CYCLES_RENDER_PT_simplify_render,
+ CYCLES_RENDER_PT_simplify_culling,
+ CYCLES_VIEW3D_PT_simplify_greasepencil,
+ CYCLES_VIEW3D_PT_shading_lighting,
+ CYCLES_VIEW3D_PT_shading_render_pass,
+ CYCLES_RENDER_PT_motion_blur,
+ CYCLES_RENDER_PT_motion_blur_curve,
+ CYCLES_RENDER_PT_film,
+ CYCLES_RENDER_PT_film_pixel_filter,
+ CYCLES_RENDER_PT_film_transparency,
+ CYCLES_RENDER_PT_performance,
+ CYCLES_RENDER_PT_performance_threads,
+ CYCLES_RENDER_PT_performance_tiles,
+ CYCLES_RENDER_PT_performance_acceleration_structure,
+ CYCLES_RENDER_PT_performance_final_render,
+ CYCLES_RENDER_PT_performance_viewport,
+ CYCLES_RENDER_PT_passes,
+ CYCLES_RENDER_PT_passes_data,
+ CYCLES_RENDER_PT_passes_light,
+ CYCLES_RENDER_PT_passes_crypto,
+ CYCLES_RENDER_PT_passes_debug,
+ CYCLES_RENDER_UL_aov,
+ CYCLES_RENDER_PT_passes_aov,
+ CYCLES_RENDER_PT_filter,
+ CYCLES_RENDER_PT_override,
+ CYCLES_RENDER_PT_denoising,
+ CYCLES_PT_post_processing,
+ CYCLES_CAMERA_PT_dof,
+ CYCLES_CAMERA_PT_dof_aperture,
+ CYCLES_PT_context_material,
+ CYCLES_OBJECT_PT_motion_blur,
+ CYCLES_OBJECT_PT_visibility,
+ CYCLES_OBJECT_PT_visibility_ray_visibility,
+ CYCLES_OBJECT_PT_visibility_culling,
+ CYCLES_LIGHT_PT_preview,
+ CYCLES_LIGHT_PT_light,
+ CYCLES_LIGHT_PT_nodes,
+ CYCLES_LIGHT_PT_spot,
+ CYCLES_WORLD_PT_preview,
+ CYCLES_WORLD_PT_surface,
+ CYCLES_WORLD_PT_volume,
+ CYCLES_WORLD_PT_ambient_occlusion,
+ CYCLES_WORLD_PT_mist,
+ CYCLES_WORLD_PT_ray_visibility,
+ CYCLES_WORLD_PT_settings,
+ CYCLES_WORLD_PT_settings_surface,
+ CYCLES_WORLD_PT_settings_volume,
+ CYCLES_MATERIAL_PT_preview,
+ CYCLES_MATERIAL_PT_surface,
+ CYCLES_MATERIAL_PT_volume,
+ CYCLES_MATERIAL_PT_displacement,
+ CYCLES_MATERIAL_PT_settings,
+ CYCLES_MATERIAL_PT_settings_surface,
+ CYCLES_MATERIAL_PT_settings_volume,
+ CYCLES_RENDER_PT_bake,
+ CYCLES_RENDER_PT_bake_influence,
+ CYCLES_RENDER_PT_bake_selected_to_active,
+ CYCLES_RENDER_PT_bake_output,
+ CYCLES_RENDER_PT_debug,
+ node_panel(CYCLES_MATERIAL_PT_settings),
+ node_panel(CYCLES_MATERIAL_PT_settings_surface),
+ node_panel(CYCLES_MATERIAL_PT_settings_volume),
+ node_panel(CYCLES_WORLD_PT_ray_visibility),
+ node_panel(CYCLES_WORLD_PT_settings),
+ node_panel(CYCLES_WORLD_PT_settings_surface),
+ node_panel(CYCLES_WORLD_PT_settings_volume),
+ node_panel(CYCLES_LIGHT_PT_light),
+ node_panel(CYCLES_LIGHT_PT_spot),
+)
+
+
+def register():
+ from bpy.utils import register_class
+
+ bpy.types.RENDER_PT_context.append(draw_device)
+ bpy.types.VIEW3D_HT_header.append(draw_pause)
+
+ for panel in get_panels():
+ panel.COMPAT_ENGINES.add('CYCLES')
+
+ for cls in classes:
+ register_class(cls)
+
+
+def unregister():
+ from bpy.utils import unregister_class
+
+ bpy.types.RENDER_PT_context.remove(draw_device)
+ bpy.types.VIEW3D_HT_header.remove(draw_pause)
+
+ for panel in get_panels():
+ if 'CYCLES' in panel.COMPAT_ENGINES:
+ panel.COMPAT_ENGINES.remove('CYCLES')
+
+ for cls in classes:
+ unregister_class(cls)
diff -Naur a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
--- a/intern/cycles/blender/blender_session.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/blender_session.cpp 2020-01-10 20:42:43.457590054 +0300
@@ -474,7 +474,8 @@
b_rlay_name = b_view_layer.name();
/* add passes */
- vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer);
+ vector<Pass> passes = sync->sync_render_passes(
+ b_rlay, b_view_layer, session_params.adaptive_sampling);
buffer_params.passes = passes;
PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
diff -Naur a/intern/cycles/blender/blender_session.cpp.orig b/intern/cycles/blender/blender_session.cpp.orig
--- a/intern/cycles/blender/blender_session.cpp.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/blender/blender_session.cpp.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,1513 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+
+#include "device/device.h"
+#include "render/background.h"
+#include "render/buffers.h"
+#include "render/camera.h"
+#include "render/colorspace.h"
+#include "render/film.h"
+#include "render/integrator.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/session.h"
+#include "render/shader.h"
+#include "render/stats.h"
+
+#include "util/util_algorithm.h"
+#include "util/util_color.h"
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_hash.h"
+#include "util/util_logging.h"
+#include "util/util_murmurhash.h"
+#include "util/util_progress.h"
+#include "util/util_time.h"
+
+#include "blender/blender_sync.h"
+#include "blender/blender_session.h"
+#include "blender/blender_util.h"
+
+CCL_NAMESPACE_BEGIN
+
+bool BlenderSession::headless = false;
+int BlenderSession::num_resumable_chunks = 0;
+int BlenderSession::current_resumable_chunk = 0;
+int BlenderSession::start_resumable_chunk = 0;
+int BlenderSession::end_resumable_chunk = 0;
+bool BlenderSession::print_render_stats = false;
+
+BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
+ BL::Preferences &b_userpref,
+ BL::BlendData &b_data,
+ bool preview_osl)
+ : session(NULL),
+ sync(NULL),
+ b_engine(b_engine),
+ b_userpref(b_userpref),
+ b_data(b_data),
+ b_render(b_engine.render()),
+ b_depsgraph(PointerRNA_NULL),
+ b_scene(PointerRNA_NULL),
+ b_v3d(PointerRNA_NULL),
+ b_rv3d(PointerRNA_NULL),
+ width(0),
+ height(0),
+ preview_osl(preview_osl),
+ python_thread_state(NULL)
+{
+ /* offline render */
+ background = true;
+ last_redraw_time = 0.0;
+ start_resize_time = 0.0;
+ last_status_time = 0.0;
+}
+
+BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
+ BL::Preferences &b_userpref,
+ BL::BlendData &b_data,
+ BL::SpaceView3D &b_v3d,
+ BL::RegionView3D &b_rv3d,
+ int width,
+ int height)
+ : session(NULL),
+ sync(NULL),
+ b_engine(b_engine),
+ b_userpref(b_userpref),
+ b_data(b_data),
+ b_render(b_engine.render()),
+ b_depsgraph(PointerRNA_NULL),
+ b_scene(PointerRNA_NULL),
+ b_v3d(b_v3d),
+ b_rv3d(b_rv3d),
+ width(width),
+ height(height),
+ preview_osl(false),
+ python_thread_state(NULL)
+{
+ /* 3d view render */
+ background = false;
+ last_redraw_time = 0.0;
+ start_resize_time = 0.0;
+ last_status_time = 0.0;
+}
+
+BlenderSession::~BlenderSession()
+{
+ free_session();
+}
+
+void BlenderSession::create_session()
+{
+ SessionParams session_params = BlenderSync::get_session_params(
+ b_engine, b_userpref, b_scene, background);
+ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+ bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+
+ /* reset status/progress */
+ last_status = "";
+ last_error = "";
+ last_progress = -1.0f;
+ start_resize_time = 0.0;
+
+ /* create session */
+ session = new Session(session_params);
+ session->scene = scene;
+ session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this));
+ session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this));
+ session->set_pause(session_pause);
+
+ /* create scene */
+ scene = new Scene(scene_params, session->device);
+ scene->name = b_scene.name();
+
+ /* setup callbacks for builtin image support */
+ scene->image_manager->builtin_image_info_cb = function_bind(
+ &BlenderSession::builtin_image_info, this, _1, _2, _3);
+ scene->image_manager->builtin_image_pixels_cb = function_bind(
+ &BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4, _5, _6, _7);
+ scene->image_manager->builtin_image_float_pixels_cb = function_bind(
+ &BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4, _5, _6, _7);
+
+ session->scene = scene;
+
+ /* There is no single depsgraph to use for the entire render.
+ * So we need to handle this differently.
+ *
+ * We could loop over the final render result render layers in pipeline and keep Cycles unaware
+ * of multiple layers, or perhaps move syncing further down in the pipeline.
+ */
+ /* create sync */
+ sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
+ BL::Object b_camera_override(b_engine.camera_override());
+ if (b_v3d) {
+ sync->sync_view(b_v3d, b_rv3d, width, height);
+ }
+ else {
+ sync->sync_camera(b_render, b_camera_override, width, height, "");
+ }
+
+ /* set buffer parameters */
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ session->reset(buffer_params, session_params.samples);
+
+ b_engine.use_highlight_tiles(session_params.progressive_refine == false);
+
+ update_resumable_tile_manager(session_params.samples);
+}
+
+void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph)
+{
+ this->b_data = b_data;
+ this->b_depsgraph = b_depsgraph;
+ this->b_scene = b_depsgraph.scene_eval();
+
+ if (preview_osl) {
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ RNA_boolean_set(&cscene, "shading_system", preview_osl);
+ }
+
+ if (b_v3d) {
+ this->b_render = b_scene.render();
+ }
+ else {
+ this->b_render = b_engine.render();
+ width = render_resolution_x(b_render);
+ height = render_resolution_y(b_render);
+ }
+
+ bool is_new_session = (session == NULL);
+ if (is_new_session) {
+ /* Initialize session and remember it was just created so not to
+ * re-create it below.
+ */
+ create_session();
+ }
+
+ if (b_v3d) {
+ /* NOTE: We need to create session, but all the code from below
+ * will make viewport render to stuck on initialization.
+ */
+ return;
+ }
+
+ SessionParams session_params = BlenderSync::get_session_params(
+ b_engine, b_userpref, b_scene, background);
+ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+
+ if (scene->params.modified(scene_params) || session->params.modified(session_params) ||
+ !scene_params.persistent_data) {
+ /* if scene or session parameters changed, it's easier to simply re-create
+ * them rather than trying to distinguish which settings need to be updated
+ */
+ if (!is_new_session) {
+ free_session();
+ create_session();
+ }
+ return;
+ }
+
+ session->progress.reset();
+ scene->reset();
+
+ session->tile_manager.set_tile_order(session_params.tile_order);
+
+ /* peak memory usage should show current render peak, not peak for all renders
+ * made by this render session
+ */
+ session->stats.mem_peak = session->stats.mem_used;
+
+ /* There is no single depsgraph to use for the entire render.
+ * See note on create_session().
+ */
+ /* sync object should be re-created */
+ sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
+
+ BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
+ BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
+ session->reset(buffer_params, session_params.samples);
+
+ b_engine.use_highlight_tiles(session_params.progressive_refine == false);
+
+ /* reset time */
+ start_resize_time = 0.0;
+}
+
+void BlenderSession::free_session()
+{
+ if (sync)
+ delete sync;
+
+ delete session;
+}
+
+static ShaderEvalType get_shader_type(const string &pass_type)
+{
+ const char *shader_type = pass_type.c_str();
+
+ /* data passes */
+ if (strcmp(shader_type, "NORMAL") == 0)
+ return SHADER_EVAL_NORMAL;
+ else if (strcmp(shader_type, "UV") == 0)
+ return SHADER_EVAL_UV;
+ else if (strcmp(shader_type, "ROUGHNESS") == 0)
+ return SHADER_EVAL_ROUGHNESS;
+ else if (strcmp(shader_type, "DIFFUSE_COLOR") == 0)
+ return SHADER_EVAL_DIFFUSE_COLOR;
+ else if (strcmp(shader_type, "GLOSSY_COLOR") == 0)
+ return SHADER_EVAL_GLOSSY_COLOR;
+ else if (strcmp(shader_type, "TRANSMISSION_COLOR") == 0)
+ return SHADER_EVAL_TRANSMISSION_COLOR;
+ else if (strcmp(shader_type, "SUBSURFACE_COLOR") == 0)
+ return SHADER_EVAL_SUBSURFACE_COLOR;
+ else if (strcmp(shader_type, "EMIT") == 0)
+ return SHADER_EVAL_EMISSION;
+
+ /* light passes */
+ else if (strcmp(shader_type, "AO") == 0)
+ return SHADER_EVAL_AO;
+ else if (strcmp(shader_type, "COMBINED") == 0)
+ return SHADER_EVAL_COMBINED;
+ else if (strcmp(shader_type, "SHADOW") == 0)
+ return SHADER_EVAL_SHADOW;
+ else if (strcmp(shader_type, "DIFFUSE") == 0)
+ return SHADER_EVAL_DIFFUSE;
+ else if (strcmp(shader_type, "GLOSSY") == 0)
+ return SHADER_EVAL_GLOSSY;
+ else if (strcmp(shader_type, "TRANSMISSION") == 0)
+ return SHADER_EVAL_TRANSMISSION;
+ else if (strcmp(shader_type, "SUBSURFACE") == 0)
+ return SHADER_EVAL_SUBSURFACE;
+
+ /* extra */
+ else if (strcmp(shader_type, "ENVIRONMENT") == 0)
+ return SHADER_EVAL_ENVIRONMENT;
+
+ else
+ return SHADER_EVAL_BAKE;
+}
+
+static BL::RenderResult begin_render_result(BL::RenderEngine &b_engine,
+ int x,
+ int y,
+ int w,
+ int h,
+ const char *layername,
+ const char *viewname)
+{
+ return b_engine.begin_result(x, y, w, h, layername, viewname);
+}
+
+static void end_render_result(BL::RenderEngine &b_engine,
+ BL::RenderResult &b_rr,
+ bool cancel,
+ bool highlight,
+ bool do_merge_results)
+{
+ b_engine.end_result(b_rr, (int)cancel, (int)highlight, (int)do_merge_results);
+}
+
+void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
+ bool do_update_only,
+ bool highlight)
+{
+ int x = rtile.x - session->tile_manager.params.full_x;
+ int y = rtile.y - session->tile_manager.params.full_y;
+ int w = rtile.w;
+ int h = rtile.h;
+
+ /* get render result */
+ BL::RenderResult b_rr = begin_render_result(
+ b_engine, x, y, w, h, b_rlay_name.c_str(), b_rview_name.c_str());
+
+ /* can happen if the intersected rectangle gives 0 width or height */
+ if (b_rr.ptr.data == NULL) {
+ return;
+ }
+
+ BL::RenderResult::layers_iterator b_single_rlay;
+ b_rr.layers.begin(b_single_rlay);
+
+ /* layer will be missing if it was disabled in the UI */
+ if (b_single_rlay == b_rr.layers.end())
+ return;
+
+ BL::RenderLayer b_rlay = *b_single_rlay;
+
+ if (do_update_only) {
+ /* Sample would be zero at initial tile update, which is only needed
+ * to tag tile form blender side as IN PROGRESS for proper highlight
+ * no buffers should be sent to blender yet. For denoise we also
+ * keep showing the noisy buffers until denoise is done. */
+ bool merge = (rtile.sample != 0) && (rtile.task != RenderTile::DENOISE);
+
+ if (merge) {
+ update_render_result(b_rlay, rtile);
+ }
+
+ end_render_result(b_engine, b_rr, true, highlight, merge);
+ }
+ else {
+ /* Write final render result. */
+ write_render_result(b_rlay, rtile);
+ end_render_result(b_engine, b_rr, false, false, true);
+ }
+}
+
+void BlenderSession::write_render_tile(RenderTile &rtile)
+{
+ do_write_update_render_tile(rtile, false, false);
+}
+
+void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
+{
+ /* use final write for preview renders, otherwise render result wouldn't be
+ * be updated in blender side
+ * would need to be investigated a bit further, but for now shall be fine
+ */
+ if (!b_engine.is_preview())
+ do_write_update_render_tile(rtile, true, highlight);
+ else
+ do_write_update_render_tile(rtile, false, false);
+}
+
+static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
+{
+ string identifier = string_printf("%08x", util_murmur_hash3(name.c_str(), name.length(), 0));
+ string prefix = "cryptomatte/" + identifier.substr(0, 7) + "/";
+
+ render_add_metadata(b_rr, prefix + "name", name);
+ render_add_metadata(b_rr, prefix + "hash", "MurmurHash3_32");
+ render_add_metadata(b_rr, prefix + "conversion", "uint32_to_float32");
+ render_add_metadata(b_rr, prefix + "manifest", manifest);
+}
+
+void BlenderSession::stamp_view_layer_metadata(Scene *scene, const string &view_layer_name)
+{
+ BL::RenderResult b_rr = b_engine.get_result();
+ string prefix = "cycles." + view_layer_name + ".";
+
+ /* Configured number of samples for the view layer. */
+ b_rr.stamp_data_add_field((prefix + "samples").c_str(),
+ to_string(session->params.samples).c_str());
+
+ /* Store ranged samples information. */
+ if (session->tile_manager.range_num_samples != -1) {
+ b_rr.stamp_data_add_field((prefix + "range_start_sample").c_str(),
+ to_string(session->tile_manager.range_start_sample).c_str());
+ b_rr.stamp_data_add_field((prefix + "range_num_samples").c_str(),
+ to_string(session->tile_manager.range_num_samples).c_str());
+ }
+
+ /* Write cryptomatte metadata. */
+ if (scene->film->cryptomatte_passes & CRYPT_OBJECT) {
+ add_cryptomatte_layer(b_rr,
+ view_layer_name + ".CryptoObject",
+ scene->object_manager->get_cryptomatte_objects(scene));
+ }
+ if (scene->film->cryptomatte_passes & CRYPT_MATERIAL) {
+ add_cryptomatte_layer(b_rr,
+ view_layer_name + ".CryptoMaterial",
+ scene->shader_manager->get_cryptomatte_materials(scene));
+ }
+ if (scene->film->cryptomatte_passes & CRYPT_ASSET) {
+ add_cryptomatte_layer(b_rr,
+ view_layer_name + ".CryptoAsset",
+ scene->object_manager->get_cryptomatte_assets(scene));
+ }
+
+ /* Store synchronization and bare-render times. */
+ double total_time, render_time;
+ session->progress.get_time(total_time, render_time);
+ b_rr.stamp_data_add_field((prefix + "total_time").c_str(),
+ time_human_readable_from_seconds(total_time).c_str());
+ b_rr.stamp_data_add_field((prefix + "render_time").c_str(),
+ time_human_readable_from_seconds(render_time).c_str());
+ b_rr.stamp_data_add_field((prefix + "synchronization_time").c_str(),
+ time_human_readable_from_seconds(total_time - render_time).c_str());
+}
+
+void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
+{
+ b_depsgraph = b_depsgraph_;
+
+ /* set callback to write out render results */
+ session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
+ session->update_render_tile_cb = function_bind(
+ &BlenderSession::update_render_tile, this, _1, _2);
+
+ /* get buffer parameters */
+ SessionParams session_params = BlenderSync::get_session_params(
+ b_engine, b_userpref, b_scene, background);
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_render, b_v3d, b_rv3d, scene->camera, width, height);
+
+ /* render each layer */
+ BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+
+ /* temporary render result to find needed passes and views */
+ BL::RenderResult b_rr = begin_render_result(
+ b_engine, 0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
+ BL::RenderResult::layers_iterator b_single_rlay;
+ b_rr.layers.begin(b_single_rlay);
+ BL::RenderLayer b_rlay = *b_single_rlay;
+ b_rlay_name = b_view_layer.name();
+
+ /* add passes */
+ vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer);
+ buffer_params.passes = passes;
+
+ PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
+ bool use_denoising = get_boolean(crl, "use_denoising");
+ bool use_optix_denoising = get_boolean(crl, "use_optix_denoising");
+ bool write_denoising_passes = get_boolean(crl, "denoising_store_passes");
+
+ buffer_params.denoising_data_pass = use_denoising || write_denoising_passes;
+ buffer_params.denoising_clean_pass = (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES);
+ buffer_params.denoising_prefiltered_pass = write_denoising_passes && !use_optix_denoising;
+
+ session->params.run_denoising = use_denoising || write_denoising_passes;
+ session->params.full_denoising = use_denoising && !use_optix_denoising;
+ session->params.optix_denoising = use_denoising && use_optix_denoising;
+ session->params.write_denoising_passes = write_denoising_passes && !use_optix_denoising;
+ session->params.denoising.radius = get_int(crl, "denoising_radius");
+ session->params.denoising.strength = get_float(crl, "denoising_strength");
+ session->params.denoising.feature_strength = get_float(crl, "denoising_feature_strength");
+ session->params.denoising.relative_pca = get_boolean(crl, "denoising_relative_pca");
+ session->params.denoising.optix_input_passes = get_enum(crl, "denoising_optix_input_passes");
+ session->tile_manager.schedule_denoising = session->params.run_denoising;
+
+ scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
+ scene->film->denoising_clean_pass = buffer_params.denoising_clean_pass;
+ scene->film->denoising_prefiltered_pass = buffer_params.denoising_prefiltered_pass;
+
+ scene->film->pass_alpha_threshold = b_view_layer.pass_alpha_threshold();
+ scene->film->tag_passes_update(scene, passes);
+ scene->film->tag_update(scene);
+ scene->integrator->tag_update(scene);
+
+ BL::RenderResult::views_iterator b_view_iter;
+
+ int num_views = 0;
+ for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end(); ++b_view_iter) {
+ num_views++;
+ }
+
+ int view_index = 0;
+ for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end();
+ ++b_view_iter, ++view_index) {
+ b_rview_name = b_view_iter->name();
+
+ /* set the current view */
+ b_engine.active_view_set(b_rview_name.c_str());
+
+ /* update scene */
+ BL::Object b_camera_override(b_engine.camera_override());
+ sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str());
+ sync->sync_data(
+ b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
+ builtin_images_load();
+
+ /* Attempt to free all data which is held by Blender side, since at this
+ * point we know that we've got everything to render current view layer.
+ */
+ /* At the moment we only free if we are not doing multi-view
+ * (or if we are rendering the last view). See T58142/D4239 for discussion.
+ */
+ if (view_index == num_views - 1) {
+ free_blender_memory_if_possible();
+ }
+
+ /* Make sure all views have different noise patterns. - hardcoded value just to make it random
+ */
+ if (view_index != 0) {
+ scene->integrator->seed += hash_uint2(scene->integrator->seed,
+ hash_uint2(view_index * 0xdeadbeef, 0));
+ scene->integrator->tag_update(scene);
+ }
+
+ /* Update number of samples per layer. */
+ int samples = sync->get_layer_samples();
+ bool bound_samples = sync->get_layer_bound_samples();
+ int effective_layer_samples;
+
+ if (samples != 0 && (!bound_samples || (samples < session_params.samples)))
+ effective_layer_samples = samples;
+ else
+ effective_layer_samples = session_params.samples;
+
+ /* Update tile manager if we're doing resumable render. */
+ update_resumable_tile_manager(effective_layer_samples);
+
+ /* Update session itself. */
+ session->reset(buffer_params, effective_layer_samples);
+
+ /* render */
+ session->start();
+ session->wait();
+
+ if (!b_engine.is_preview() && background && print_render_stats) {
+ RenderStats stats;
+ session->collect_statistics(&stats);
+ printf("Render statistics:\n%s\n", stats.full_report().c_str());
+ }
+
+ if (session->progress.get_cancel())
+ break;
+ }
+
+ /* add metadata */
+ stamp_view_layer_metadata(scene, b_rlay_name);
+
+ /* free result without merging */
+ end_render_result(b_engine, b_rr, true, true, false);
+
+ double total_time, render_time;
+ session->progress.get_time(total_time, render_time);
+ VLOG(1) << "Total render time: " << total_time;
+ VLOG(1) << "Render time (without synchronization): " << render_time;
+
+ /* clear callback */
+ session->write_render_tile_cb = function_null;
+ session->update_render_tile_cb = function_null;
+
+ /* TODO: find a way to clear this data for persistent data render */
+#if 0
+ /* free all memory used (host and device), so we wouldn't leave render
+ * engine with extra memory allocated
+ */
+
+ session->device_free();
+
+ delete sync;
+ sync = NULL;
+#endif
+}
+
+static void populate_bake_data(BakeData *data,
+ const int object_id,
+ BL::BakePixel &pixel_array,
+ const int num_pixels)
+{
+ BL::BakePixel bp = pixel_array;
+
+ int i;
+ for (i = 0; i < num_pixels; i++) {
+ if (bp.object_id() == object_id) {
+ data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
+ }
+ else {
+ data->set_null(i);
+ }
+ bp = bp.next();
+ }
+}
+
+static int bake_pass_filter_get(const int pass_filter)
+{
+ int flag = BAKE_FILTER_NONE;
+
+ if ((pass_filter & BL::BakeSettings::pass_filter_DIRECT) != 0)
+ flag |= BAKE_FILTER_DIRECT;
+ if ((pass_filter & BL::BakeSettings::pass_filter_INDIRECT) != 0)
+ flag |= BAKE_FILTER_INDIRECT;
+ if ((pass_filter & BL::BakeSettings::pass_filter_COLOR) != 0)
+ flag |= BAKE_FILTER_COLOR;
+
+ if ((pass_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0)
+ flag |= BAKE_FILTER_DIFFUSE;
+ if ((pass_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0)
+ flag |= BAKE_FILTER_GLOSSY;
+ if ((pass_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0)
+ flag |= BAKE_FILTER_TRANSMISSION;
+ if ((pass_filter & BL::BakeSettings::pass_filter_SUBSURFACE) != 0)
+ flag |= BAKE_FILTER_SUBSURFACE;
+
+ if ((pass_filter & BL::BakeSettings::pass_filter_EMIT) != 0)
+ flag |= BAKE_FILTER_EMISSION;
+ if ((pass_filter & BL::BakeSettings::pass_filter_AO) != 0)
+ flag |= BAKE_FILTER_AO;
+
+ return flag;
+}
+
+void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
+ BL::Object &b_object,
+ const string &pass_type,
+ const int pass_filter,
+ const int object_id,
+ BL::BakePixel &pixel_array,
+ const size_t num_pixels,
+ const int /*depth*/,
+ float result[])
+{
+ b_depsgraph = b_depsgraph_;
+
+ ShaderEvalType shader_type = get_shader_type(pass_type);
+
+ /* Set baking flag in advance, so kernel loading can check if we need
+ * any baking capabilities.
+ */
+ scene->bake_manager->set_baking(true);
+
+ /* ensure kernels are loaded before we do any scene updates */
+ session->load_kernels();
+
+ if (shader_type == SHADER_EVAL_UV) {
+ /* force UV to be available */
+ Pass::add(PASS_UV, scene->film->passes);
+ }
+
+ int bake_pass_filter = bake_pass_filter_get(pass_filter);
+ bake_pass_filter = BakeManager::shader_type_to_pass_filter(shader_type, bake_pass_filter);
+
+ /* force use_light_pass to be true if we bake more than just colors */
+ if (bake_pass_filter & ~BAKE_FILTER_COLOR) {
+ Pass::add(PASS_LIGHT, scene->film->passes);
+ }
+
+ /* create device and update scene */
+ scene->film->tag_update(scene);
+ scene->integrator->tag_update(scene);
+
+ if (!session->progress.get_cancel()) {
+ /* update scene */
+ BL::Object b_camera_override(b_engine.camera_override());
+ sync->sync_camera(b_render, b_camera_override, width, height, "");
+ sync->sync_data(
+ b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
+ builtin_images_load();
+ }
+
+ BakeData *bake_data = NULL;
+
+ if (!session->progress.get_cancel()) {
+ /* get buffer parameters */
+ SessionParams session_params = BlenderSync::get_session_params(
+ b_engine, b_userpref, b_scene, background);
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_render, b_v3d, b_rv3d, scene->camera, width, height);
+
+ scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
+
+ /* set number of samples */
+ session->tile_manager.set_samples(session_params.samples);
+ session->reset(buffer_params, session_params.samples);
+ session->update_scene();
+
+ /* find object index. todo: is arbitrary - copied from mesh_displace.cpp */
+ size_t object_index = OBJECT_NONE;
+ int tri_offset = 0;
+
+ for (size_t i = 0; i < scene->objects.size(); i++) {
+ if (strcmp(scene->objects[i]->name.c_str(), b_object.name().c_str()) == 0) {
+ object_index = i;
+ tri_offset = scene->objects[i]->mesh->tri_offset;
+ break;
+ }
+ }
+
+ /* Object might have been disabled for rendering or excluded in some
+ * other way, in that case Blender will report a warning afterwards. */
+ if (object_index != OBJECT_NONE) {
+ int object = object_index;
+
+ bake_data = scene->bake_manager->init(object, tri_offset, num_pixels);
+ populate_bake_data(bake_data, object_id, pixel_array, num_pixels);
+ }
+
+ /* set number of samples */
+ session->tile_manager.set_samples(session_params.samples);
+ session->reset(buffer_params, session_params.samples);
+ session->update_scene();
+
+ session->progress.set_update_callback(
+ function_bind(&BlenderSession::update_bake_progress, this));
+ }
+
+ /* Perform bake. Check cancel to avoid crash with incomplete scene data. */
+ if (!session->progress.get_cancel() && bake_data) {
+ scene->bake_manager->bake(scene->device,
+ &scene->dscene,
+ scene,
+ session->progress,
+ shader_type,
+ bake_pass_filter,
+ bake_data,
+ result);
+ }
+
+ /* free all memory used (host and device), so we wouldn't leave render
+ * engine with extra memory allocated
+ */
+
+ session->device_free();
+
+ delete sync;
+ sync = NULL;
+}
+
+void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay,
+ RenderTile &rtile,
+ bool do_update_only)
+{
+ RenderBuffers *buffers = rtile.buffers;
+
+ /* copy data from device */
+ if (!buffers->copy_from_device())
+ return;
+
+ float exposure = scene->film->exposure;
+
+ vector<float> pixels(rtile.w * rtile.h * 4);
+
+ /* Adjust absolute sample number to the range. */
+ int sample = rtile.sample;
+ const int range_start_sample = session->tile_manager.range_start_sample;
+ if (range_start_sample != -1) {
+ sample -= range_start_sample;
+ }
+
+ if (!do_update_only) {
+ /* copy each pass */
+ BL::RenderLayer::passes_iterator b_iter;
+
+ for (b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) {
+ BL::RenderPass b_pass(*b_iter);
+ int components = b_pass.channels();
+
+ /* Copy pixels from regular render passes. */
+ bool read = buffers->get_pass_rect(b_pass.name(), exposure, sample, components, &pixels[0]);
+
+ /* If denoising pass, */
+ if (!read) {
+ int denoising_offset = BlenderSync::get_denoising_pass(b_pass);
+ if (denoising_offset >= 0) {
+ read = buffers->get_denoising_pass_rect(
+ denoising_offset, exposure, sample, components, &pixels[0]);
+ }
+ }
+
+ if (!read) {
+ memset(&pixels[0], 0, pixels.size() * sizeof(float));
+ }
+
+ b_pass.rect(&pixels[0]);
+ }
+ }
+ else {
+ /* copy combined pass */
+ BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
+ if (buffers->get_pass_rect("Combined", exposure, sample, 4, &pixels[0]))
+ b_combined_pass.rect(&pixels[0]);
+ }
+}
+
+void BlenderSession::write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
+{
+ do_write_update_render_result(b_rlay, rtile, false);
+}
+
+void BlenderSession::update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
+{
+ do_write_update_render_result(b_rlay, rtile, true);
+}
+
+void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
+{
+ /* only used for viewport render */
+ if (!b_v3d)
+ return;
+
+ /* on session/scene parameter changes, we recreate session entirely */
+ SessionParams session_params = BlenderSync::get_session_params(
+ b_engine, b_userpref, b_scene, background);
+ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+ bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+
+ if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
+ free_session();
+ create_session();
+ return;
+ }
+
+ /* increase samples, but never decrease */
+ session->set_samples(session_params.samples);
+ session->set_pause(session_pause);
+
+ /* copy recalc flags, outside of mutex so we can decide to do the real
+ * synchronization at a later time to not block on running updates */
+ sync->sync_recalc(b_depsgraph_, b_v3d);
+
+ /* don't do synchronization if on pause */
+ if (session_pause) {
+ tag_update();
+ return;
+ }
+
+ /* try to acquire mutex. if we don't want to or can't, come back later */
+ if (!session->ready_to_reset() || !session->scene->mutex.try_lock()) {
+ tag_update();
+ return;
+ }
+
+ /* data and camera synchronize */
+ b_depsgraph = b_depsgraph_;
+
+ BL::Object b_camera_override(b_engine.camera_override());
+ sync->sync_data(
+ b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
+
+ if (b_rv3d)
+ sync->sync_view(b_v3d, b_rv3d, width, height);
+ else
+ sync->sync_camera(b_render, b_camera_override, width, height, "");
+
+ /* reset if needed */
+ if (scene->need_reset()) {
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ session->reset(buffer_params, session_params.samples);
+
+ /* After session reset, so device is not accessing image data anymore. */
+ builtin_images_load();
+
+ /* reset time */
+ start_resize_time = 0.0;
+ }
+
+ /* unlock */
+ session->scene->mutex.unlock();
+
+ /* Start rendering thread, if it's not running already. Do this
+ * after all scene data has been synced at least once. */
+ session->start();
+}
+
+bool BlenderSession::draw(int w, int h)
+{
+ /* pause in redraw in case update is not being called due to final render */
+ session->set_pause(BlenderSync::get_session_pause(b_scene, background));
+
+ /* before drawing, we verify camera and viewport size changes, because
+ * we do not get update callbacks for those, we must detect them here */
+ if (session->ready_to_reset()) {
+ bool reset = false;
+
+ /* if dimensions changed, reset */
+ if (width != w || height != h) {
+ if (start_resize_time == 0.0) {
+ /* don't react immediately to resizes to avoid flickery resizing
+ * of the viewport, and some window managers changing the window
+ * size temporarily on unminimize */
+ start_resize_time = time_dt();
+ tag_redraw();
+ }
+ else if (time_dt() - start_resize_time < 0.2) {
+ tag_redraw();
+ }
+ else {
+ width = w;
+ height = h;
+ reset = true;
+ }
+ }
+
+ /* try to acquire mutex. if we can't, come back later */
+ if (!session->scene->mutex.try_lock()) {
+ tag_update();
+ }
+ else {
+ /* update camera from 3d view */
+
+ sync->sync_view(b_v3d, b_rv3d, width, height);
+
+ if (scene->camera->need_update)
+ reset = true;
+
+ session->scene->mutex.unlock();
+ }
+
+ /* reset if requested */
+ if (reset) {
+ SessionParams session_params = BlenderSync::get_session_params(
+ b_engine, b_userpref, b_scene, background);
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+
+ if (session_pause == false) {
+ session->reset(buffer_params, session_params.samples);
+ start_resize_time = 0.0;
+ }
+ }
+ }
+ else {
+ tag_update();
+ }
+
+ /* update status and progress for 3d view draw */
+ update_status_progress();
+
+ /* draw */
+ BufferParams buffer_params = BlenderSync::get_buffer_params(
+ b_render, b_v3d, b_rv3d, scene->camera, width, height);
+ DeviceDrawParams draw_params;
+
+ if (session->params.display_buffer_linear) {
+ draw_params.bind_display_space_shader_cb = function_bind(
+ &BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene);
+ draw_params.unbind_display_space_shader_cb = function_bind(
+ &BL::RenderEngine::unbind_display_space_shader, &b_engine);
+ }
+
+ return !session->draw(buffer_params, draw_params);
+}
+
+void BlenderSession::get_status(string &status, string &substatus)
+{
+ session->progress.get_status(status, substatus);
+}
+
+void BlenderSession::get_kernel_status(string &kernel_status)
+{
+ session->progress.get_kernel_status(kernel_status);
+}
+
+void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
+{
+ session->progress.get_time(total_time, render_time);
+ progress = session->progress.get_progress();
+}
+
+void BlenderSession::update_bake_progress()
+{
+ float progress = session->progress.get_progress();
+
+ if (progress != last_progress) {
+ b_engine.update_progress(progress);
+ last_progress = progress;
+ }
+}
+
+void BlenderSession::update_status_progress()
+{
+ string timestatus, status, substatus, kernel_status;
+ string scene_status = "";
+ float progress;
+ double total_time, remaining_time = 0, render_time;
+ float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
+ float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
+
+ get_status(status, substatus);
+ get_kernel_status(kernel_status);
+ get_progress(progress, total_time, render_time);
+
+ if (progress > 0)
+ remaining_time = (1.0 - (double)progress) * (render_time / (double)progress);
+
+ if (background) {
+ scene_status += " | " + scene->name;
+ if (b_rlay_name != "")
+ scene_status += ", " + b_rlay_name;
+
+ if (b_rview_name != "")
+ scene_status += ", " + b_rview_name;
+
+ if (remaining_time > 0) {
+ timestatus += "Remaining:" + time_human_readable_from_seconds(remaining_time) + " | ";
+ }
+
+ timestatus += string_printf("Mem:%.2fM, Peak:%.2fM", (double)mem_used, (double)mem_peak);
+
+ if (status.size() > 0)
+ status = " | " + status;
+ if (substatus.size() > 0)
+ status += " | " + substatus;
+ if (kernel_status.size() > 0)
+ status += " | " + kernel_status;
+ }
+
+ double current_time = time_dt();
+ /* When rendering in a window, redraw the status at least once per second to keep the elapsed and
+ * remaining time up-to-date. For headless rendering, only report when something significant
+ * changes to keep the console output readable. */
+ if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
+ b_engine.update_stats("", (timestatus + scene_status + status).c_str());
+ b_engine.update_memory_stats(mem_used, mem_peak);
+ last_status = status;
+ last_status_time = current_time;
+ }
+ if (progress != last_progress) {
+ b_engine.update_progress(progress);
+ last_progress = progress;
+ }
+
+ if (session->progress.get_error()) {
+ string error = session->progress.get_error_message();
+ if (error != last_error) {
+ /* TODO(sergey): Currently C++ RNA API doesn't let us to
+ * use mnemonic name for the variable. Would be nice to
+ * have this figured out.
+ *
+ * For until then, 1 << 5 means RPT_ERROR.
+ */
+ b_engine.report(1 << 5, error.c_str());
+ b_engine.error_set(error.c_str());
+ last_error = error;
+ }
+ }
+}
+
+void BlenderSession::tag_update()
+{
+ /* tell blender that we want to get another update callback */
+ b_engine.tag_update();
+}
+
+void BlenderSession::tag_redraw()
+{
+ if (background) {
+ /* update stats and progress, only for background here because
+ * in 3d view we do it in draw for thread safety reasons */
+ update_status_progress();
+
+ /* offline render, redraw if timeout passed */
+ if (time_dt() - last_redraw_time > 1.0) {
+ b_engine.tag_redraw();
+ last_redraw_time = time_dt();
+ }
+ }
+ else {
+ /* tell blender that we want to redraw */
+ b_engine.tag_redraw();
+ }
+}
+
+void BlenderSession::test_cancel()
+{
+ /* test if we need to cancel rendering */
+ if (background)
+ if (b_engine.test_break())
+ session->progress.set_cancel("Cancelled");
+}
+
+/* builtin image file name is actually an image datablock name with
+ * absolute sequence frame number concatenated via '@' character
+ *
+ * this function splits frame from builtin name
+ */
+int BlenderSession::builtin_image_frame(const string &builtin_name)
+{
+ int last = builtin_name.find_last_of('@');
+ return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str());
+}
+
+void BlenderSession::builtin_image_info(const string &builtin_name,
+ void *builtin_data,
+ ImageMetaData &metadata)
+{
+ /* empty image */
+ metadata.width = 1;
+ metadata.height = 1;
+
+ if (!builtin_data)
+ return;
+
+ /* recover ID pointer */
+ PointerRNA ptr;
+ RNA_id_pointer_create((ID *)builtin_data, &ptr);
+ BL::ID b_id(ptr);
+
+ if (b_id.is_a(&RNA_Image)) {
+ /* image data */
+ BL::Image b_image(b_id);
+
+ metadata.builtin_free_cache = !b_image.has_data();
+ metadata.is_float = b_image.is_float();
+ metadata.width = b_image.size()[0];
+ metadata.height = b_image.size()[1];
+ metadata.depth = 1;
+ metadata.channels = b_image.channels();
+
+ if (metadata.is_float) {
+ /* Float images are already converted on the Blender side,
+ * no need to do anything in Cycles. */
+ metadata.colorspace = u_colorspace_raw;
+ }
+ }
+ else if (b_id.is_a(&RNA_Object)) {
+ /* smoke volume data */
+ BL::Object b_ob(b_id);
+ BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob);
+
+ metadata.is_float = true;
+ metadata.depth = 1;
+ metadata.channels = 1;
+
+ if (!b_domain)
+ return;
+
+ if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY) ||
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME) ||
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT) ||
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE))
+ metadata.channels = 1;
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR))
+ metadata.channels = 4;
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY))
+ metadata.channels = 3;
+ else
+ return;
+
+ int3 resolution = get_int3(b_domain.domain_resolution());
+ int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+ /* Velocity and heat data is always low-resolution. */
+ if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
+ amplify = 1;
+ }
+
+ metadata.width = resolution.x * amplify;
+ metadata.height = resolution.y * amplify;
+ metadata.depth = resolution.z * amplify;
+ }
+ else {
+ /* TODO(sergey): Check we're indeed in shader node tree. */
+ PointerRNA ptr;
+ RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr);
+ BL::Node b_node(ptr);
+ if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
+ BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
+ metadata.channels = 4;
+ metadata.width = b_point_density_node.resolution();
+ metadata.height = metadata.width;
+ metadata.depth = metadata.width;
+ metadata.is_float = true;
+ }
+ }
+}
+
+bool BlenderSession::builtin_image_pixels(const string &builtin_name,
+ void *builtin_data,
+ int tile,
+ unsigned char *pixels,
+ const size_t pixels_size,
+ const bool associate_alpha,
+ const bool free_cache)
+{
+ if (!builtin_data) {
+ return false;
+ }
+
+ const int frame = builtin_image_frame(builtin_name);
+
+ PointerRNA ptr;
+ RNA_id_pointer_create((ID *)builtin_data, &ptr);
+ BL::Image b_image(ptr);
+
+ const int width = b_image.size()[0];
+ const int height = b_image.size()[1];
+ const int channels = b_image.channels();
+
+ unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile);
+ const size_t num_pixels = ((size_t)width) * height;
+
+ if (image_pixels && num_pixels * channels == pixels_size) {
+ memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
+ }
+ else {
+ if (channels == 1) {
+ memset(pixels, 0, pixels_size * sizeof(unsigned char));
+ }
+ else {
+ const size_t num_pixels_safe = pixels_size / channels;
+ unsigned char *cp = pixels;
+ for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
+ cp[0] = 255;
+ cp[1] = 0;
+ cp[2] = 255;
+ if (channels == 4) {
+ cp[3] = 255;
+ }
+ }
+ }
+ }
+
+ if (image_pixels) {
+ MEM_freeN(image_pixels);
+ }
+
+ /* Free image buffers to save memory during render. */
+ if (free_cache) {
+ b_image.buffers_free();
+ }
+
+ if (associate_alpha) {
+ /* Premultiply, byte images are always straight for Blender. */
+ unsigned char *cp = pixels;
+ for (size_t i = 0; i < num_pixels; i++, cp += channels) {
+ cp[0] = (cp[0] * cp[3]) >> 8;
+ cp[1] = (cp[1] * cp[3]) >> 8;
+ cp[2] = (cp[2] * cp[3]) >> 8;
+ }
+ }
+ return true;
+}
+
+bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
+ void *builtin_data,
+ int tile,
+ float *pixels,
+ const size_t pixels_size,
+ const bool,
+ const bool free_cache)
+{
+ if (!builtin_data) {
+ return false;
+ }
+
+ PointerRNA ptr;
+ RNA_id_pointer_create((ID *)builtin_data, &ptr);
+ BL::ID b_id(ptr);
+
+ if (b_id.is_a(&RNA_Image)) {
+ /* image data */
+ BL::Image b_image(b_id);
+ int frame = builtin_image_frame(builtin_name);
+
+ const int width = b_image.size()[0];
+ const int height = b_image.size()[1];
+ const int channels = b_image.channels();
+
+ float *image_pixels;
+ image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile);
+ const size_t num_pixels = ((size_t)width) * height;
+
+ if (image_pixels && num_pixels * channels == pixels_size) {
+ memcpy(pixels, image_pixels, pixels_size * sizeof(float));
+ }
+ else {
+ if (channels == 1) {
+ memset(pixels, 0, num_pixels * sizeof(float));
+ }
+ else {
+ const size_t num_pixels_safe = pixels_size / channels;
+ float *fp = pixels;
+ for (int i = 0; i < num_pixels_safe; i++, fp += channels) {
+ fp[0] = 1.0f;
+ fp[1] = 0.0f;
+ fp[2] = 1.0f;
+ if (channels == 4) {
+ fp[3] = 1.0f;
+ }
+ }
+ }
+ }
+
+ if (image_pixels) {
+ MEM_freeN(image_pixels);
+ }
+
+ /* Free image buffers to save memory during render. */
+ if (free_cache) {
+ b_image.buffers_free();
+ }
+
+ return true;
+ }
+ else if (b_id.is_a(&RNA_Object)) {
+ /* smoke volume data */
+ BL::Object b_ob(b_id);
+ BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob);
+
+ if (!b_domain) {
+ return false;
+ }
+
+ int3 resolution = get_int3(b_domain.domain_resolution());
+ int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+ /* Velocity and heat data is always low-resolution. */
+ if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
+ amplify = 1;
+ }
+
+ const int width = resolution.x * amplify;
+ const int height = resolution.y * amplify;
+ const int depth = resolution.z * amplify;
+ const size_t num_pixels = ((size_t)width) * height * depth;
+
+ if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
+ FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_density_grid_get(&b_domain.ptr, pixels);
+ return true;
+ }
+ }
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) {
+ /* this is in range 0..1, and interpreted by the OpenGL smoke viewer
+ * as 1500..3000 K with the first part faded to zero density */
+ FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_flame_grid_get(&b_domain.ptr, pixels);
+ return true;
+ }
+ }
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) {
+ /* the RGB is "premultiplied" by density for better interpolation results */
+ FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels * 4) {
+ FluidDomainSettings_color_grid_get(&b_domain.ptr, pixels);
+ return true;
+ }
+ }
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) {
+ FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels * 3) {
+ FluidDomainSettings_velocity_grid_get(&b_domain.ptr, pixels);
+ return true;
+ }
+ }
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
+ FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_heat_grid_get(&b_domain.ptr, pixels);
+ return true;
+ }
+ }
+ else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) {
+ FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length);
+ if (length == num_pixels) {
+ FluidDomainSettings_temperature_grid_get(&b_domain.ptr, pixels);
+ return true;
+ }
+ }
+ else {
+ fprintf(
+ stderr, "Cycles error: unknown volume attribute %s, skipping\n", builtin_name.c_str());
+ pixels[0] = 0.0f;
+ return false;
+ }
+
+ fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n");
+ }
+ else {
+ /* We originally were passing view_layer here but in reality we need a
+ * a depsgraph to pass to the RE_point_density_minmax() function.
+ */
+ /* TODO(sergey): Check we're indeed in shader node tree. */
+ PointerRNA ptr;
+ RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr);
+ BL::Node b_node(ptr);
+ if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
+ BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
+ int length;
+ b_point_density_node.calc_point_density(b_depsgraph, &length, &pixels);
+ }
+ }
+
+ return false;
+}
+
+void BlenderSession::builtin_images_load()
+{
+ /* Force builtin images to be loaded along with Blender data sync. This
+ * is needed because we may be reading from depsgraph evaluated data which
+ * can be freed by Blender before Cycles reads it.
+ *
+ * TODO: the assumption that no further access to builtin image data will
+ * happen is really weak, and likely to break in the future. We should find
+ * a better solution to hand over the data directly to the image manager
+ * instead of through callbacks whose timing is difficult to control. */
+ ImageManager *manager = session->scene->image_manager;
+ Device *device = session->device;
+ manager->device_load_builtin(device, session->scene, session->progress);
+}
+
+void BlenderSession::update_resumable_tile_manager(int num_samples)
+{
+ const int num_resumable_chunks = BlenderSession::num_resumable_chunks,
+ current_resumable_chunk = BlenderSession::current_resumable_chunk;
+ if (num_resumable_chunks == 0) {
+ return;
+ }
+
+ if (num_resumable_chunks > num_samples) {
+ fprintf(stderr,
+ "Cycles warning: more sample chunks (%d) than samples (%d), "
+ "this will cause some samples to be included in multiple chunks.\n",
+ num_resumable_chunks,
+ num_samples);
+ }
+
+ const float num_samples_per_chunk = (float)num_samples / num_resumable_chunks;
+
+ float range_start_sample, range_num_samples;
+ if (current_resumable_chunk != 0) {
+ /* Single chunk rendering. */
+ range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
+ range_num_samples = num_samples_per_chunk;
+ }
+ else {
+ /* Ranged-chunks. */
+ const int num_chunks = end_resumable_chunk - start_resumable_chunk + 1;
+ range_start_sample = num_samples_per_chunk * (start_resumable_chunk - 1);
+ range_num_samples = num_chunks * num_samples_per_chunk;
+ }
+
+ /* Round after doing the multiplications with num_chunks and num_samples_per_chunk
+ * to allow for many small chunks. */
+ int rounded_range_start_sample = (int)floorf(range_start_sample + 0.5f);
+ int rounded_range_num_samples = max((int)floorf(range_num_samples + 0.5f), 1);
+
+ /* Make sure we don't overshoot. */
+ if (rounded_range_start_sample + rounded_range_num_samples > num_samples) {
+ rounded_range_num_samples = num_samples - rounded_range_num_samples;
+ }
+
+ VLOG(1) << "Samples range start is " << range_start_sample << ", "
+ << "number of samples to render is " << range_num_samples;
+
+ scene->integrator->start_sample = rounded_range_start_sample;
+ scene->integrator->tag_update(scene);
+
+ session->tile_manager.range_start_sample = rounded_range_start_sample;
+ session->tile_manager.range_num_samples = rounded_range_num_samples;
+}
+
+void BlenderSession::free_blender_memory_if_possible()
+{
+ if (!background) {
+ /* During interactive render we can not free anything: attempts to save
+ * memory would cause things to be allocated and evaluated for every
+ * updated sample.
+ */
+ return;
+ }
+ b_engine.free_blender_memory();
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
--- a/intern/cycles/blender/blender_sync.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/blender_sync.cpp 2020-01-10 20:42:43.457590054 +0300
@@ -291,6 +291,16 @@
integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold");
+ if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) {
+ integrator->sampling_pattern = SAMPLING_PATTERN_PMJ;
+ integrator->adaptive_min_samples = get_int(cscene, "adaptive_min_samples");
+ integrator->adaptive_threshold = get_float(cscene, "adaptive_threshold");
+ }
+ else {
+ integrator->adaptive_min_samples = INT_MAX;
+ integrator->adaptive_threshold = 0.0f;
+ }
+
int diffuse_samples = get_int(cscene, "diffuse_samples");
int glossy_samples = get_int(cscene, "glossy_samples");
int transmission_samples = get_int(cscene, "transmission_samples");
@@ -307,6 +317,8 @@
integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples;
integrator->subsurface_samples = subsurface_samples * subsurface_samples;
integrator->volume_samples = volume_samples * volume_samples;
+ integrator->adaptive_min_samples = min(
+ integrator->adaptive_min_samples * integrator->adaptive_min_samples, INT_MAX);
}
else {
integrator->diffuse_samples = diffuse_samples;
@@ -482,6 +494,8 @@
MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
#endif
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
+ MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
+ MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
if (string_startswith(name, cryptomatte_prefix)) {
return PASS_CRYPTOMATTE;
}
@@ -517,7 +531,9 @@
return -1;
}
-vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
+ BL::ViewLayer &b_view_layer,
+ bool adaptive_sampling)
{
vector<Pass> passes;
@@ -595,6 +611,10 @@
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
}
+ if (get_boolean(crp, "pass_debug_sample_count")) {
+ b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_SAMPLE_COUNT, passes);
+ }
if (get_boolean(crp, "use_pass_volume_direct")) {
b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
@@ -656,6 +676,13 @@
}
RNA_END;
+ if (adaptive_sampling) {
+ Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes);
+ if (!get_boolean(crp, "pass_debug_sample_count")) {
+ Pass::add(PASS_SAMPLE_COUNT, passes);
+ }
+ }
+
return passes;
}
@@ -889,6 +916,8 @@
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
BlenderSession::print_render_stats;
+ params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling");
+
return params;
}
diff -Naur a/intern/cycles/blender/blender_sync.cpp.orig b/intern/cycles/blender/blender_sync.cpp.orig
--- a/intern/cycles/blender/blender_sync.cpp.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/blender/blender_sync.cpp.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,895 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/background.h"
+#include "render/camera.h"
+#include "render/film.h"
+#include "render/graph.h"
+#include "render/integrator.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/nodes.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/shader.h"
+#include "render/curves.h"
+
+#include "device/device.h"
+
+#include "blender/blender_device.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_session.h"
+#include "blender/blender_util.h"
+
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_opengl.h"
+#include "util/util_hash.h"
+
+CCL_NAMESPACE_BEGIN
+
+static const char *cryptomatte_prefix = "Crypto";
+
+/* Constructor */
+
+BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
+ BL::BlendData &b_data,
+ BL::Scene &b_scene,
+ Scene *scene,
+ bool preview,
+ Progress &progress)
+ : b_engine(b_engine),
+ b_data(b_data),
+ b_scene(b_scene),
+ shader_map(&scene->shaders),
+ object_map(&scene->objects),
+ mesh_map(&scene->meshes),
+ light_map(&scene->lights),
+ particle_system_map(&scene->particle_systems),
+ world_map(NULL),
+ world_recalc(false),
+ scene(scene),
+ preview(preview),
+ experimental(false),
+ dicing_rate(1.0f),
+ max_subdivisions(12),
+ progress(progress)
+{
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
+ RNA_float_get(&cscene, "dicing_rate");
+ max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
+}
+
+BlenderSync::~BlenderSync()
+{
+}
+
+/* Sync */
+
+void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
+{
+ /* Sync recalc flags from blender to cycles. Actual update is done separate,
+ * so we can do it later on if doing it immediate is not suitable. */
+
+ bool has_updated_objects = b_depsgraph.id_type_updated(BL::DriverTarget::id_type_OBJECT);
+
+ if (experimental) {
+ /* Mark all meshes as needing to be exported again if dicing changed. */
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ bool dicing_prop_changed = false;
+
+ float updated_dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
+ RNA_float_get(&cscene, "dicing_rate");
+
+ if (dicing_rate != updated_dicing_rate) {
+ dicing_rate = updated_dicing_rate;
+ dicing_prop_changed = true;
+ }
+
+ int updated_max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
+
+ if (max_subdivisions != updated_max_subdivisions) {
+ max_subdivisions = updated_max_subdivisions;
+ dicing_prop_changed = true;
+ }
+
+ if (dicing_prop_changed) {
+ for (const pair<void *, Mesh *> &iter : mesh_map.key_to_scene_data()) {
+ Mesh *mesh = iter.second;
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
+ mesh_map.set_recalc(iter.first);
+ }
+ }
+ }
+ }
+
+ /* Iterate over all IDs in this depsgraph. */
+ BL::Depsgraph::updates_iterator b_update;
+ for (b_depsgraph.updates.begin(b_update); b_update != b_depsgraph.updates.end(); ++b_update) {
+ BL::ID b_id(b_update->id());
+
+ /* Material */
+ if (b_id.is_a(&RNA_Material)) {
+ BL::Material b_mat(b_id);
+ shader_map.set_recalc(b_mat);
+ }
+ /* Light */
+ else if (b_id.is_a(&RNA_Light)) {
+ BL::Light b_light(b_id);
+ shader_map.set_recalc(b_light);
+ }
+ /* Object */
+ else if (b_id.is_a(&RNA_Object)) {
+ BL::Object b_ob(b_id);
+ const bool updated_geometry = b_update->is_updated_geometry();
+
+ if (b_update->is_updated_transform()) {
+ object_map.set_recalc(b_ob);
+ light_map.set_recalc(b_ob);
+ }
+
+ if (object_is_mesh(b_ob)) {
+ if (updated_geometry ||
+ (object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
+ BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
+ mesh_map.set_recalc(key);
+ }
+ }
+ else if (object_is_light(b_ob)) {
+ if (updated_geometry) {
+ light_map.set_recalc(b_ob);
+ }
+ }
+
+ if (updated_geometry) {
+ BL::Object::particle_systems_iterator b_psys;
+ for (b_ob.particle_systems.begin(b_psys); b_psys != b_ob.particle_systems.end(); ++b_psys)
+ particle_system_map.set_recalc(b_ob);
+ }
+ }
+ /* Mesh */
+ else if (b_id.is_a(&RNA_Mesh)) {
+ BL::Mesh b_mesh(b_id);
+ mesh_map.set_recalc(b_mesh);
+ }
+ /* World */
+ else if (b_id.is_a(&RNA_World)) {
+ BL::World b_world(b_id);
+ if (world_map == b_world.ptr.data) {
+ world_recalc = true;
+ }
+ }
+ }
+
+ BlenderViewportParameters new_viewport_parameters(b_v3d);
+ if (viewport_parameters.modified(new_viewport_parameters)) {
+ world_recalc = true;
+ }
+
+ /* Updates shader with object dependency if objects changed. */
+ if (has_updated_objects) {
+ if (scene->default_background->has_object_dependency) {
+ world_recalc = true;
+ }
+
+ foreach (Shader *shader, scene->shaders) {
+ if (shader->has_object_dependency) {
+ shader->need_sync_object = true;
+ }
+ }
+ }
+}
+
+void BlenderSync::sync_data(BL::RenderSettings &b_render,
+ BL::Depsgraph &b_depsgraph,
+ BL::SpaceView3D &b_v3d,
+ BL::Object &b_override,
+ int width,
+ int height,
+ void **python_thread_state)
+{
+ BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+
+ sync_view_layer(b_v3d, b_view_layer);
+ sync_integrator();
+ sync_film(b_v3d);
+ sync_shaders(b_depsgraph, b_v3d);
+ sync_images();
+ sync_curve_settings();
+
+ mesh_synced.clear(); /* use for objects and motion sync */
+
+ if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE ||
+ scene->camera->motion_position == Camera::MOTION_POSITION_CENTER) {
+ sync_objects(b_depsgraph, b_v3d);
+ }
+ sync_motion(b_render, b_depsgraph, b_v3d, b_override, width, height, python_thread_state);
+
+ mesh_synced.clear();
+
+ /* Shader sync done at the end, since object sync uses it.
+ * false = don't delete unused shaders, not supported. */
+ shader_map.post_sync(false);
+
+ free_data_after_sync(b_depsgraph);
+}
+
+/* Integrator */
+
+void BlenderSync::sync_integrator()
+{
+ BL::RenderSettings r = b_scene.render();
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+ experimental = (get_enum(cscene, "feature_set") != 0);
+
+ Integrator *integrator = scene->integrator;
+ Integrator previntegrator = *integrator;
+
+ integrator->min_bounce = get_int(cscene, "min_light_bounces");
+ integrator->max_bounce = get_int(cscene, "max_bounces");
+
+ integrator->max_diffuse_bounce = get_int(cscene, "diffuse_bounces");
+ integrator->max_glossy_bounce = get_int(cscene, "glossy_bounces");
+ integrator->max_transmission_bounce = get_int(cscene, "transmission_bounces");
+ integrator->max_volume_bounce = get_int(cscene, "volume_bounces");
+
+ integrator->transparent_min_bounce = get_int(cscene, "min_transparent_bounces");
+ integrator->transparent_max_bounce = get_int(cscene, "transparent_max_bounces");
+
+ integrator->volume_max_steps = get_int(cscene, "volume_max_steps");
+ integrator->volume_step_size = get_float(cscene, "volume_step_size");
+
+ integrator->caustics_reflective = get_boolean(cscene, "caustics_reflective");
+ integrator->caustics_refractive = get_boolean(cscene, "caustics_refractive");
+ integrator->filter_glossy = get_float(cscene, "blur_glossy");
+
+ integrator->seed = get_int(cscene, "seed");
+ if (get_boolean(cscene, "use_animated_seed")) {
+ integrator->seed = hash_uint2(b_scene.frame_current(), get_int(cscene, "seed"));
+ if (b_scene.frame_subframe() != 0.0f) {
+ /* TODO(sergey): Ideally should be some sort of hash_merge,
+ * but this is good enough for now.
+ */
+ integrator->seed += hash_uint2((int)(b_scene.frame_subframe() * (float)INT_MAX),
+ get_int(cscene, "seed"));
+ }
+ }
+
+ integrator->sampling_pattern = (SamplingPattern)get_enum(
+ cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_SOBOL);
+
+ integrator->sample_clamp_direct = get_float(cscene, "sample_clamp_direct");
+ integrator->sample_clamp_indirect = get_float(cscene, "sample_clamp_indirect");
+ if (!preview) {
+ if (integrator->motion_blur != r.use_motion_blur()) {
+ scene->object_manager->tag_update(scene);
+ scene->camera->tag_update();
+ }
+
+ integrator->motion_blur = r.use_motion_blur();
+ }
+
+ integrator->method = (Integrator::Method)get_enum(
+ cscene, "progressive", Integrator::NUM_METHODS, Integrator::PATH);
+
+ integrator->sample_all_lights_direct = get_boolean(cscene, "sample_all_lights_direct");
+ integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
+ integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold");
+
+ int diffuse_samples = get_int(cscene, "diffuse_samples");
+ int glossy_samples = get_int(cscene, "glossy_samples");
+ int transmission_samples = get_int(cscene, "transmission_samples");
+ int ao_samples = get_int(cscene, "ao_samples");
+ int mesh_light_samples = get_int(cscene, "mesh_light_samples");
+ int subsurface_samples = get_int(cscene, "subsurface_samples");
+ int volume_samples = get_int(cscene, "volume_samples");
+
+ if (get_boolean(cscene, "use_square_samples")) {
+ integrator->diffuse_samples = diffuse_samples * diffuse_samples;
+ integrator->glossy_samples = glossy_samples * glossy_samples;
+ integrator->transmission_samples = transmission_samples * transmission_samples;
+ integrator->ao_samples = ao_samples * ao_samples;
+ integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples;
+ integrator->subsurface_samples = subsurface_samples * subsurface_samples;
+ integrator->volume_samples = volume_samples * volume_samples;
+ }
+ else {
+ integrator->diffuse_samples = diffuse_samples;
+ integrator->glossy_samples = glossy_samples;
+ integrator->transmission_samples = transmission_samples;
+ integrator->ao_samples = ao_samples;
+ integrator->mesh_light_samples = mesh_light_samples;
+ integrator->subsurface_samples = subsurface_samples;
+ integrator->volume_samples = volume_samples;
+ }
+
+ if (b_scene.render().use_simplify()) {
+ if (preview) {
+ integrator->ao_bounces = get_int(cscene, "ao_bounces");
+ }
+ else {
+ integrator->ao_bounces = get_int(cscene, "ao_bounces_render");
+ }
+ }
+ else {
+ integrator->ao_bounces = 0;
+ }
+
+ if (integrator->modified(previntegrator))
+ integrator->tag_update(scene);
+}
+
+/* Film */
+
+void BlenderSync::sync_film(BL::SpaceView3D &b_v3d)
+{
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+ Film *film = scene->film;
+ Film prevfilm = *film;
+
+ if (b_v3d) {
+ film->display_pass = update_viewport_display_passes(b_v3d, film->passes);
+ }
+
+ film->exposure = get_float(cscene, "film_exposure");
+ film->filter_type = (FilterType)get_enum(
+ cscene, "pixel_filter_type", FILTER_NUM_TYPES, FILTER_BLACKMAN_HARRIS);
+ film->filter_width = (film->filter_type == FILTER_BOX) ? 1.0f :
+ get_float(cscene, "filter_width");
+
+ if (b_scene.world()) {
+ BL::WorldMistSettings b_mist = b_scene.world().mist_settings();
+
+ film->mist_start = b_mist.start();
+ film->mist_depth = b_mist.depth();
+
+ switch (b_mist.falloff()) {
+ case BL::WorldMistSettings::falloff_QUADRATIC:
+ film->mist_falloff = 2.0f;
+ break;
+ case BL::WorldMistSettings::falloff_LINEAR:
+ film->mist_falloff = 1.0f;
+ break;
+ case BL::WorldMistSettings::falloff_INVERSE_QUADRATIC:
+ film->mist_falloff = 0.5f;
+ break;
+ }
+ }
+
+ if (film->modified(prevfilm)) {
+ film->tag_update(scene);
+ film->tag_passes_update(scene, prevfilm.passes, false);
+ }
+}
+
+/* Render Layer */
+
+void BlenderSync::sync_view_layer(BL::SpaceView3D & /*b_v3d*/, BL::ViewLayer &b_view_layer)
+{
+ /* render layer */
+ view_layer.name = b_view_layer.name();
+ view_layer.use_background_shader = b_view_layer.use_sky();
+ view_layer.use_background_ao = b_view_layer.use_ao();
+ view_layer.use_surfaces = b_view_layer.use_solid();
+ view_layer.use_hair = b_view_layer.use_strand();
+
+ /* Material override. */
+ view_layer.material_override = b_view_layer.material_override();
+
+ /* Sample override. */
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ int use_layer_samples = get_enum(cscene, "use_layer_samples");
+
+ view_layer.bound_samples = (use_layer_samples == 1);
+ view_layer.samples = 0;
+
+ if (use_layer_samples != 2) {
+ int samples = b_view_layer.samples();
+ if (get_boolean(cscene, "use_square_samples"))
+ view_layer.samples = samples * samples;
+ else
+ view_layer.samples = samples;
+ }
+}
+
+/* Images */
+void BlenderSync::sync_images()
+{
+ /* Sync is a convention for this API, but currently it frees unused buffers. */
+
+ const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
+ if (is_interface_locked == false && BlenderSession::headless == false) {
+ /* If interface is not locked, it's possible image is needed for
+ * the display.
+ */
+ return;
+ }
+ /* Free buffers used by images which are not needed for render. */
+ BL::BlendData::images_iterator b_image;
+ for (b_data.images.begin(b_image); b_image != b_data.images.end(); ++b_image) {
+ /* TODO(sergey): Consider making it an utility function to check
+ * whether image is considered builtin.
+ */
+ const bool is_builtin = b_image->packed_file() ||
+ b_image->source() == BL::Image::source_GENERATED ||
+ b_image->source() == BL::Image::source_MOVIE || b_engine.is_preview();
+ if (is_builtin == false) {
+ b_image->buffers_free();
+ }
+ /* TODO(sergey): Free builtin images not used by any shader. */
+ }
+}
+
+/* Passes */
+PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
+{
+ string name = b_pass.name();
+#define MAP_PASS(passname, passtype) \
+ if (name == passname) \
+ return passtype;
+ /* NOTE: Keep in sync with defined names from DNA_scene_types.h */
+ MAP_PASS("Combined", PASS_COMBINED);
+ MAP_PASS("Depth", PASS_DEPTH);
+ MAP_PASS("Mist", PASS_MIST);
+ MAP_PASS("Normal", PASS_NORMAL);
+ MAP_PASS("IndexOB", PASS_OBJECT_ID);
+ MAP_PASS("UV", PASS_UV);
+ MAP_PASS("Vector", PASS_MOTION);
+ MAP_PASS("IndexMA", PASS_MATERIAL_ID);
+
+ MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT);
+ MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT);
+ MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT);
+ MAP_PASS("SubsurfaceDir", PASS_SUBSURFACE_DIRECT);
+ MAP_PASS("VolumeDir", PASS_VOLUME_DIRECT);
+
+ MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT);
+ MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT);
+ MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT);
+ MAP_PASS("SubsurfaceInd", PASS_SUBSURFACE_INDIRECT);
+ MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT);
+
+ MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR);
+ MAP_PASS("GlossCol", PASS_GLOSSY_COLOR);
+ MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR);
+ MAP_PASS("SubsurfaceCol", PASS_SUBSURFACE_COLOR);
+
+ MAP_PASS("Emit", PASS_EMISSION);
+ MAP_PASS("Env", PASS_BACKGROUND);
+ MAP_PASS("AO", PASS_AO);
+ MAP_PASS("Shadow", PASS_SHADOW);
+
+#ifdef __KERNEL_DEBUG__
+ MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES);
+ MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES);
+ MAP_PASS("Debug BVH Intersections", PASS_BVH_INTERSECTIONS);
+ MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
+#endif
+ MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
+ if (string_startswith(name, cryptomatte_prefix)) {
+ return PASS_CRYPTOMATTE;
+ }
+#undef MAP_PASS
+
+ return PASS_NONE;
+}
+
+int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
+{
+ string name = b_pass.name();
+
+ if (name == "Noisy Image")
+ return DENOISING_PASS_PREFILTERED_COLOR;
+
+ if (name.substr(0, 10) != "Denoising ") {
+ return -1;
+ }
+ name = name.substr(10);
+
+#define MAP_PASS(passname, offset) \
+ if (name == passname) \
+ return offset;
+ MAP_PASS("Normal", DENOISING_PASS_PREFILTERED_NORMAL);
+ MAP_PASS("Albedo", DENOISING_PASS_PREFILTERED_ALBEDO);
+ MAP_PASS("Depth", DENOISING_PASS_PREFILTERED_DEPTH);
+ MAP_PASS("Shadowing", DENOISING_PASS_PREFILTERED_SHADOWING);
+ MAP_PASS("Variance", DENOISING_PASS_PREFILTERED_VARIANCE);
+ MAP_PASS("Intensity", DENOISING_PASS_PREFILTERED_INTENSITY);
+ MAP_PASS("Clean", DENOISING_PASS_CLEAN);
+#undef MAP_PASS
+
+ return -1;
+}
+
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
+{
+ vector<Pass> passes;
+
+ /* loop over passes */
+ BL::RenderLayer::passes_iterator b_pass_iter;
+
+ for (b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
+ BL::RenderPass b_pass(*b_pass_iter);
+ PassType pass_type = get_pass_type(b_pass);
+
+ if (pass_type == PASS_MOTION && scene->integrator->motion_blur)
+ continue;
+ if (pass_type != PASS_NONE)
+ Pass::add(pass_type, passes, b_pass.name().c_str());
+ }
+
+ PointerRNA crp = RNA_pointer_get(&b_view_layer.ptr, "cycles");
+ bool use_denoising = get_boolean(crp, "use_denoising");
+ bool use_optix_denoising = get_boolean(crp, "use_optix_denoising");
+ bool write_denoising_passes = get_boolean(crp, "denoising_store_passes");
+
+ scene->film->denoising_flags = 0;
+ if (use_denoising || write_denoising_passes) {
+ if (!use_optix_denoising) {
+#define MAP_OPTION(name, flag) \
+ if (!get_boolean(crp, name)) \
+ scene->film->denoising_flags |= flag;
+ MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
+ MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
+ MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR);
+ MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND);
+ MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR);
+ MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND);
+ MAP_OPTION("denoising_subsurface_direct", DENOISING_CLEAN_SUBSURFACE_DIR);
+ MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND);
+#undef MAP_OPTION
+ }
+ b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
+ }
+
+ if (write_denoising_passes) {
+ b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
+ b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
+ b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str());
+ if (!use_optix_denoising) {
+ b_engine.add_pass("Denoising Shadowing", 1, "X", b_view_layer.name().c_str());
+ b_engine.add_pass("Denoising Variance", 3, "RGB", b_view_layer.name().c_str());
+ b_engine.add_pass("Denoising Intensity", 1, "X", b_view_layer.name().c_str());
+ }
+
+ if (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) {
+ b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str());
+ }
+ }
+
+#ifdef __KERNEL_DEBUG__
+ if (get_boolean(crp, "pass_debug_bvh_traversed_nodes")) {
+ b_engine.add_pass("Debug BVH Traversed Nodes", 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_BVH_TRAVERSED_NODES, passes, "Debug BVH Traversed Nodes");
+ }
+ if (get_boolean(crp, "pass_debug_bvh_traversed_instances")) {
+ b_engine.add_pass("Debug BVH Traversed Instances", 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_BVH_TRAVERSED_INSTANCES, passes, "Debug BVH Traversed Instances");
+ }
+ if (get_boolean(crp, "pass_debug_bvh_intersections")) {
+ b_engine.add_pass("Debug BVH Intersections", 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_BVH_INTERSECTIONS, passes, "Debug BVH Intersections");
+ }
+ if (get_boolean(crp, "pass_debug_ray_bounces")) {
+ b_engine.add_pass("Debug Ray Bounces", 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_RAY_BOUNCES, passes, "Debug Ray Bounces");
+ }
+#endif
+ if (get_boolean(crp, "pass_debug_render_time")) {
+ b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
+ }
+ if (get_boolean(crp, "use_pass_volume_direct")) {
+ b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
+ Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
+ }
+ if (get_boolean(crp, "use_pass_volume_indirect")) {
+ b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str());
+ Pass::add(PASS_VOLUME_INDIRECT, passes, "VolumeInd");
+ }
+
+ /* Cryptomatte stores two ID/weight pairs per RGBA layer.
+ * User facing parameter is the number of pairs. */
+ int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2;
+ scene->film->cryptomatte_depth = crypto_depth;
+ scene->film->cryptomatte_passes = CRYPT_NONE;
+ if (get_boolean(crp, "use_pass_crypto_object")) {
+ for (int i = 0; i < crypto_depth; ++i) {
+ string passname = cryptomatte_prefix + string_printf("Object%02d", i);
+ b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+ Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
+ }
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
+ CRYPT_OBJECT);
+ }
+ if (get_boolean(crp, "use_pass_crypto_material")) {
+ for (int i = 0; i < crypto_depth; ++i) {
+ string passname = cryptomatte_prefix + string_printf("Material%02d", i);
+ b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+ Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
+ }
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
+ CRYPT_MATERIAL);
+ }
+ if (get_boolean(crp, "use_pass_crypto_asset")) {
+ for (int i = 0; i < crypto_depth; ++i) {
+ string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
+ b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+ Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
+ }
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
+ CRYPT_ASSET);
+ }
+ if (get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
+ scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
+ CRYPT_ACCURATE);
+ }
+
+ RNA_BEGIN (&crp, b_aov, "aovs") {
+ bool is_color = (get_enum(b_aov, "type") == 1);
+ string name = get_string(b_aov, "name");
+
+ if (is_color) {
+ b_engine.add_pass(name.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+ Pass::add(PASS_AOV_COLOR, passes, name.c_str());
+ }
+ else {
+ b_engine.add_pass(name.c_str(), 1, "X", b_view_layer.name().c_str());
+ Pass::add(PASS_AOV_VALUE, passes, name.c_str());
+ }
+ }
+ RNA_END;
+
+ return passes;
+}
+
+void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
+{
+ /* When viewport display is not needed during render we can force some
+ * caches to be releases from blender side in order to reduce peak memory
+ * footprint during synchronization process.
+ */
+ const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
+ const bool can_free_caches = BlenderSession::headless || is_interface_locked;
+ if (!can_free_caches) {
+ return;
+ }
+ /* TODO(sergey): We can actually remove the whole dependency graph,
+ * but that will need some API support first.
+ */
+ BL::Depsgraph::objects_iterator b_ob;
+ for (b_depsgraph.objects.begin(b_ob); b_ob != b_depsgraph.objects.end(); ++b_ob) {
+ b_ob->cache_release();
+ }
+}
+
+/* Scene Parameters */
+
+SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
+{
+ BL::RenderSettings r = b_scene.render();
+ SceneParams params;
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
+
+ if (shadingsystem == 0)
+ params.shadingsystem = SHADINGSYSTEM_SVM;
+ else if (shadingsystem == 1)
+ params.shadingsystem = SHADINGSYSTEM_OSL;
+
+ if (background || DebugFlags().viewport_static_bvh)
+ params.bvh_type = SceneParams::BVH_STATIC;
+ else
+ params.bvh_type = SceneParams::BVH_DYNAMIC;
+
+ params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
+ params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
+ params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
+
+ if (background && params.shadingsystem != SHADINGSYSTEM_OSL)
+ params.persistent_data = r.use_persistent_data();
+ else
+ params.persistent_data = false;
+
+ int texture_limit;
+ if (background) {
+ texture_limit = RNA_enum_get(&cscene, "texture_limit_render");
+ }
+ else {
+ texture_limit = RNA_enum_get(&cscene, "texture_limit");
+ }
+ if (texture_limit > 0 && b_scene.render().use_simplify()) {
+ params.texture_limit = 1 << (texture_limit + 6);
+ }
+ else {
+ params.texture_limit = 0;
+ }
+
+ /* TODO(sergey): Once OSL supports per-microarchitecture optimization get
+ * rid of this.
+ */
+ if (params.shadingsystem == SHADINGSYSTEM_OSL) {
+ params.bvh_layout = BVH_LAYOUT_BVH4;
+ }
+ else {
+ params.bvh_layout = DebugFlags().cpu.bvh_layout;
+ }
+
+#ifdef WITH_EMBREE
+ params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE :
+ params.bvh_layout;
+#endif
+
+ params.background = background;
+
+ return params;
+}
+
+/* Session Parameters */
+
+bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background)
+{
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+ return (background) ? false : get_boolean(cscene, "preview_pause");
+}
+
+SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
+ BL::Preferences &b_preferences,
+ BL::Scene &b_scene,
+ bool background)
+{
+ SessionParams params;
+ PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+ /* feature set */
+ params.experimental = (get_enum(cscene, "feature_set") != 0);
+
+ /* Background */
+ params.background = background;
+
+ /* Device */
+ params.threads = blender_device_threads(b_scene);
+ params.device = blender_device_info(b_preferences, b_scene, params.background);
+
+ /* samples */
+ int samples = get_int(cscene, "samples");
+ int aa_samples = get_int(cscene, "aa_samples");
+ int preview_samples = get_int(cscene, "preview_samples");
+ int preview_aa_samples = get_int(cscene, "preview_aa_samples");
+
+ if (get_boolean(cscene, "use_square_samples")) {
+ aa_samples = aa_samples * aa_samples;
+ preview_aa_samples = preview_aa_samples * preview_aa_samples;
+
+ samples = samples * samples;
+ preview_samples = preview_samples * preview_samples;
+ }
+
+ if (get_enum(cscene, "progressive") == 0 && (params.device.type != DEVICE_OPTIX)) {
+ if (background) {
+ params.samples = aa_samples;
+ }
+ else {
+ params.samples = preview_aa_samples;
+ if (params.samples == 0)
+ params.samples = INT_MAX;
+ }
+ }
+ else {
+ if (background) {
+ params.samples = samples;
+ }
+ else {
+ params.samples = preview_samples;
+ if (params.samples == 0)
+ params.samples = INT_MAX;
+ }
+ }
+
+ /* Clamp samples. */
+ params.samples = min(params.samples, Integrator::MAX_SAMPLES);
+
+ /* tiles */
+ const bool is_cpu = (params.device.type == DEVICE_CPU);
+ if (!is_cpu && !background) {
+ /* currently GPU could be much slower than CPU when using tiles,
+ * still need to be investigated, but meanwhile make it possible
+ * to work in viewport smoothly
+ */
+ int debug_tile_size = get_int(cscene, "debug_tile_size");
+
+ params.tile_size = make_int2(debug_tile_size, debug_tile_size);
+ }
+ else {
+ int tile_x = b_engine.tile_x();
+ int tile_y = b_engine.tile_y();
+
+ params.tile_size = make_int2(tile_x, tile_y);
+ }
+
+ if ((BlenderSession::headless == false) && background) {
+ params.tile_order = (TileOrder)get_enum(cscene, "tile_order");
+ }
+ else {
+ params.tile_order = TILE_BOTTOM_TO_TOP;
+ }
+
+ /* other parameters */
+ params.start_resolution = get_int(cscene, "preview_start_resolution");
+ params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
+
+ /* other parameters */
+ params.cancel_timeout = (double)get_float(cscene, "debug_cancel_timeout");
+ params.reset_timeout = (double)get_float(cscene, "debug_reset_timeout");
+ params.text_timeout = (double)get_float(cscene, "debug_text_timeout");
+
+ /* progressive refine */
+ BL::RenderSettings b_r = b_scene.render();
+ params.progressive_refine = (b_engine.is_preview() ||
+ get_boolean(cscene, "use_progressive_refine")) &&
+ !b_r.use_save_buffers();
+
+ if (params.progressive_refine) {
+ BL::Scene::view_layers_iterator b_view_layer;
+ for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
+ ++b_view_layer) {
+ PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
+ if (get_boolean(crl, "use_denoising")) {
+ params.progressive_refine = false;
+ }
+ }
+ }
+
+ if (background) {
+ if (params.progressive_refine)
+ params.progressive = true;
+ else
+ params.progressive = false;
+
+ params.start_resolution = INT_MAX;
+ params.pixel_size = 1;
+ }
+ else
+ params.progressive = true;
+
+ /* shading system - scene level needs full refresh */
+ const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
+
+ if (shadingsystem == 0)
+ params.shadingsystem = SHADINGSYSTEM_SVM;
+ else if (shadingsystem == 1)
+ params.shadingsystem = SHADINGSYSTEM_OSL;
+
+ /* color managagement */
+ params.display_buffer_linear = b_engine.support_display_space_shader(b_scene);
+
+ if (b_engine.is_preview()) {
+ /* For preview rendering we're using same timeout as
+ * blender's job update.
+ */
+ params.progressive_update_timeout = 0.1;
+ }
+
+ params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
+ BlenderSession::print_render_stats;
+
+ return params;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
--- a/intern/cycles/blender/blender_sync.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/blender_sync.h 2020-01-10 20:42:43.457590054 +0300
@@ -70,7 +70,9 @@
int height,
void **python_thread_state);
void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
- vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
+ vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer,
+ BL::ViewLayer &b_view_layer,
+ bool adaptive_sampling);
void sync_integrator();
void sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
diff -Naur a/intern/cycles/blender/blender_sync.h.orig b/intern/cycles/blender/blender_sync.h.orig
--- a/intern/cycles/blender/blender_sync.h.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/blender/blender_sync.h.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,226 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_SYNC_H__
+#define __BLENDER_SYNC_H__
+
+#include "MEM_guardedalloc.h"
+#include "RNA_types.h"
+#include "RNA_access.h"
+#include "RNA_blender_cpp.h"
+
+#include "blender/blender_util.h"
+#include "blender/blender_viewport.h"
+
+#include "render/scene.h"
+#include "render/session.h"
+
+#include "util/util_map.h"
+#include "util/util_set.h"
+#include "util/util_transform.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Background;
+class BlenderObjectCulling;
+class BlenderViewportParameters;
+class Camera;
+class Film;
+class Light;
+class Mesh;
+class Object;
+class ParticleSystem;
+class Scene;
+class ViewLayer;
+class Shader;
+class ShaderGraph;
+class ShaderNode;
+
+class BlenderSync {
+ public:
+ BlenderSync(BL::RenderEngine &b_engine,
+ BL::BlendData &b_data,
+ BL::Scene &b_scene,
+ Scene *scene,
+ bool preview,
+ Progress &progress);
+ ~BlenderSync();
+
+ /* sync */
+ void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
+ void sync_data(BL::RenderSettings &b_render,
+ BL::Depsgraph &b_depsgraph,
+ BL::SpaceView3D &b_v3d,
+ BL::Object &b_override,
+ int width,
+ int height,
+ void **python_thread_state);
+ void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
+ vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
+ void sync_integrator();
+ void sync_camera(BL::RenderSettings &b_render,
+ BL::Object &b_override,
+ int width,
+ int height,
+ const char *viewname);
+ void sync_view(BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, int width, int height);
+ inline int get_layer_samples()
+ {
+ return view_layer.samples;
+ }
+ inline int get_layer_bound_samples()
+ {
+ return view_layer.bound_samples;
+ }
+
+ /* get parameters */
+ static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
+ static SessionParams get_session_params(BL::RenderEngine &b_engine,
+ BL::Preferences &b_userpref,
+ BL::Scene &b_scene,
+ bool background);
+ static bool get_session_pause(BL::Scene &b_scene, bool background);
+ static BufferParams get_buffer_params(BL::RenderSettings &b_render,
+ BL::SpaceView3D &b_v3d,
+ BL::RegionView3D &b_rv3d,
+ Camera *cam,
+ int width,
+ int height);
+
+ static PassType get_pass_type(BL::RenderPass &b_pass);
+ static int get_denoising_pass(BL::RenderPass &b_pass);
+
+ private:
+ /* sync */
+ void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
+ void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
+ void sync_objects(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, float motion_time = 0.0f);
+ void sync_motion(BL::RenderSettings &b_render,
+ BL::Depsgraph &b_depsgraph,
+ BL::SpaceView3D &b_v3d,
+ BL::Object &b_override,
+ int width,
+ int height,
+ void **python_thread_state);
+ void sync_film(BL::SpaceView3D &b_v3d);
+ void sync_view();
+ void sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all);
+ void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
+ void sync_curve_settings();
+
+ void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree);
+ Mesh *sync_mesh(BL::Depsgraph &b_depsgrpah,
+ BL::Object &b_ob,
+ BL::Object &b_ob_instance,
+ bool object_updated,
+ bool show_self,
+ bool show_particles);
+ void sync_curves(
+ Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
+ Object *sync_object(BL::Depsgraph &b_depsgraph,
+ BL::ViewLayer &b_view_layer,
+ BL::DepsgraphObjectInstance &b_instance,
+ float motion_time,
+ bool show_self,
+ bool show_particles,
+ bool show_lights,
+ BlenderObjectCulling &culling,
+ bool *use_portal);
+ void sync_light(BL::Object &b_parent,
+ int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
+ BL::Object &b_ob,
+ BL::Object &b_ob_instance,
+ int random_id,
+ Transform &tfm,
+ bool *use_portal);
+ void sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal);
+ void sync_mesh_motion(BL::Depsgraph &b_depsgraph,
+ BL::Object &b_ob,
+ Object *object,
+ float motion_time);
+ void sync_camera_motion(
+ BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
+
+ /* particles */
+ bool sync_dupli_particle(BL::Object &b_ob,
+ BL::DepsgraphObjectInstance &b_instance,
+ Object *object);
+
+ /* Images. */
+ void sync_images();
+
+ /* Early data free. */
+ void free_data_after_sync(BL::Depsgraph &b_depsgraph);
+
+ /* util */
+ void find_shader(BL::ID &id, vector<Shader *> &used_shaders, Shader *default_shader);
+ bool BKE_object_is_modified(BL::Object &b_ob);
+ bool object_is_mesh(BL::Object &b_ob);
+ bool object_is_light(BL::Object &b_ob);
+
+ /* variables */
+ BL::RenderEngine b_engine;
+ BL::BlendData b_data;
+ BL::Scene b_scene;
+
+ id_map<void *, Shader> shader_map;
+ id_map<ObjectKey, Object> object_map;
+ id_map<void *, Mesh> mesh_map;
+ id_map<ObjectKey, Light> light_map;
+ id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
+ set<Mesh *> mesh_synced;
+ set<Mesh *> mesh_motion_synced;
+ set<float> motion_times;
+ void *world_map;
+ bool world_recalc;
+ BlenderViewportParameters viewport_parameters;
+
+ Scene *scene;
+ bool preview;
+ bool experimental;
+
+ float dicing_rate;
+ int max_subdivisions;
+
+ struct RenderLayerInfo {
+ RenderLayerInfo()
+ : material_override(PointerRNA_NULL),
+ use_background_shader(true),
+ use_background_ao(true),
+ use_surfaces(true),
+ use_hair(true),
+ samples(0),
+ bound_samples(false)
+ {
+ }
+
+ string name;
+ BL::Material material_override;
+ bool use_background_shader;
+ bool use_background_ao;
+ bool use_surfaces;
+ bool use_hair;
+ int samples;
+ bool bound_samples;
+ } view_layer;
+
+ Progress &progress;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_SYNC_H__ */
diff -Naur a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
--- a/intern/cycles/device/device_cpu.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_cpu.cpp 2020-01-10 20:42:43.457590054 +0300
@@ -34,6 +34,7 @@
#include "kernel/kernel_types.h"
#include "kernel/split/kernel_split_data.h"
#include "kernel/kernel_globals.h"
+#include "kernel/kernel_adaptive_sampling.h"
#include "kernel/filter/filter.h"
@@ -317,6 +318,10 @@
REGISTER_SPLIT_KERNEL(next_iteration_setup);
REGISTER_SPLIT_KERNEL(indirect_subsurface);
REGISTER_SPLIT_KERNEL(buffer_update);
+ REGISTER_SPLIT_KERNEL(adaptive_stopping);
+ REGISTER_SPLIT_KERNEL(adaptive_filter_x);
+ REGISTER_SPLIT_KERNEL(adaptive_filter_y);
+ REGISTER_SPLIT_KERNEL(adaptive_adjust_samples);
#undef REGISTER_SPLIT_KERNEL
#undef KERNEL_FUNCTIONS
}
@@ -851,10 +856,33 @@
path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
}
-
tile.sample = sample + 1;
task.update_progress(&tile, tile.w * tile.h);
+
+ if (kernel_data.film.pass_adaptive_aux_buffer && (sample & 0x3) == 3 &&
+ sample >= kernel_data.integrator.adaptive_min_samples - 1) {
+ WorkTile wtile;
+ wtile.x = tile.x;
+ wtile.y = tile.y;
+ wtile.w = tile.w;
+ wtile.h = tile.h;
+ wtile.offset = tile.offset;
+ wtile.stride = tile.stride;
+ wtile.buffer = (float *)tile.buffer;
+
+ bool any = false;
+ for (int y = tile.y; y < tile.y + tile.h; ++y) {
+ any |= kernel_do_adaptive_filter_x(kg, y, &wtile);
+ }
+ for (int x = tile.x; x < tile.x + tile.w; ++x) {
+ any |= kernel_do_adaptive_filter_y(kg, x, &wtile);
+ }
+ if (!any) {
+ tile.sample = end_sample;
+ break;
+ }
+ }
}
if (use_coverage) {
coverage.finalize();
@@ -931,6 +959,28 @@
}
else {
path_trace(task, tile, kg);
+ if (task.integrator_adaptive && kernel_data.film.pass_adaptive_aux_buffer) {
+ float *render_buffer = (float *)tile.buffer;
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ int index = tile.offset + x + y * tile.stride;
+ ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+ if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] =
+ -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = tile.sample /
+ max((float)tile.start_sample + 1.0f,
+ buffer[kernel_data.film.pass_sample_count]);
+ if (sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(kg, buffer, tile.sample / (tile.sample - 1.0f));
+ }
+ }
+ }
+ }
}
}
else if (tile.task == RenderTile::DENOISE) {
diff -Naur a/intern/cycles/device/device_cpu.cpp.orig b/intern/cycles/device/device_cpu.cpp.orig
--- a/intern/cycles/device/device_cpu.cpp.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/device/device_cpu.cpp.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,1247 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+/* So ImathMath is included before our kernel_cpu_compat. */
+#ifdef WITH_OSL
+/* So no context pollution happens from indirectly included windows.h */
+# include "util/util_windows.h"
+# include <OSL/oslexec.h>
+#endif
+
+#include "device/device.h"
+#include "device/device_denoising.h"
+#include "device/device_intern.h"
+#include "device/device_split_kernel.h"
+
+#include "kernel/kernel.h"
+#include "kernel/kernel_compat_cpu.h"
+#include "kernel/kernel_types.h"
+#include "kernel/split/kernel_split_data.h"
+#include "kernel/kernel_globals.h"
+
+#include "kernel/filter/filter.h"
+
+#include "kernel/osl/osl_shader.h"
+#include "kernel/osl/osl_globals.h"
+
+#include "render/buffers.h"
+#include "render/coverage.h"
+
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_logging.h"
+#include "util/util_map.h"
+#include "util/util_opengl.h"
+#include "util/util_optimization.h"
+#include "util/util_progress.h"
+#include "util/util_system.h"
+#include "util/util_thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+class CPUDevice;
+
+/* Has to be outside of the class to be shared across template instantiations. */
+static const char *logged_architecture = "";
+
+template<typename F> class KernelFunctions {
+ public:
+ KernelFunctions()
+ {
+ kernel = (F)NULL;
+ }
+
+ KernelFunctions(
+ F kernel_default, F kernel_sse2, F kernel_sse3, F kernel_sse41, F kernel_avx, F kernel_avx2)
+ {
+ const char *architecture_name = "default";
+ kernel = kernel_default;
+
+ /* Silence potential warnings about unused variables
+ * when compiling without some architectures. */
+ (void)kernel_sse2;
+ (void)kernel_sse3;
+ (void)kernel_sse41;
+ (void)kernel_avx;
+ (void)kernel_avx2;
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+ if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
+ architecture_name = "AVX2";
+ kernel = kernel_avx2;
+ }
+ else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+ if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
+ architecture_name = "AVX";
+ kernel = kernel_avx;
+ }
+ else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+ if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
+ architecture_name = "SSE4.1";
+ kernel = kernel_sse41;
+ }
+ else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+ if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
+ architecture_name = "SSE3";
+ kernel = kernel_sse3;
+ }
+ else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+ if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
+ architecture_name = "SSE2";
+ kernel = kernel_sse2;
+ }
+#else
+ {
+ /* Dummy to prevent the architecture if below become
+ * conditional when WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+ * is not defined. */
+ }
+#endif
+
+ if (strcmp(architecture_name, logged_architecture) != 0) {
+ VLOG(1) << "Will be using " << architecture_name << " kernels.";
+ logged_architecture = architecture_name;
+ }
+ }
+
+ inline F operator()() const
+ {
+ assert(kernel);
+ return kernel;
+ }
+
+ protected:
+ F kernel;
+};
+
+class CPUSplitKernel : public DeviceSplitKernel {
+ CPUDevice *device;
+
+ public:
+ explicit CPUSplitKernel(CPUDevice *device);
+
+ virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
+ RenderTile &rtile,
+ int num_global_elements,
+ device_memory &kernel_globals,
+ device_memory &kernel_data_,
+ device_memory &split_data,
+ device_memory &ray_state,
+ device_memory &queue_index,
+ device_memory &use_queues_flag,
+ device_memory &work_pool_wgs);
+
+ virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
+ const DeviceRequestedFeatures &);
+ virtual int2 split_kernel_local_size();
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+ virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
+};
+
+class CPUDevice : public Device {
+ public:
+ TaskPool task_pool;
+ KernelGlobals kernel_globals;
+
+ device_vector<TextureInfo> texture_info;
+ bool need_texture_info;
+
+#ifdef WITH_OSL
+ OSLGlobals osl_globals;
+#endif
+
+ bool use_split_kernel;
+
+ DeviceRequestedFeatures requested_features;
+
+ KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel;
+ KernelFunctions<void (*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>
+ convert_to_half_float_kernel;
+ KernelFunctions<void (*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>
+ convert_to_byte_kernel;
+ KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)>
+ shader_kernel;
+
+ KernelFunctions<void (*)(
+ int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)>
+ filter_divide_shadow_kernel;
+ KernelFunctions<void (*)(
+ int, TileInfo *, int, int, int, int, float *, float *, float, int *, int, int)>
+ filter_get_feature_kernel;
+ KernelFunctions<void (*)(int, int, int, int *, float *, float *, int, int *)>
+ filter_write_feature_kernel;
+ KernelFunctions<void (*)(int, int, float *, float *, float *, float *, int *, int)>
+ filter_detect_outliers_kernel;
+ KernelFunctions<void (*)(int, int, float *, float *, float *, float *, int *, int)>
+ filter_combine_halves_kernel;
+
+ KernelFunctions<void (*)(
+ int, int, float *, float *, float *, float *, int *, int, int, int, float, float)>
+ filter_nlm_calc_difference_kernel;
+ KernelFunctions<void (*)(float *, float *, int *, int, int)> filter_nlm_blur_kernel;
+ KernelFunctions<void (*)(float *, float *, int *, int, int)> filter_nlm_calc_weight_kernel;
+ KernelFunctions<void (*)(
+ int, int, float *, float *, float *, float *, float *, int *, int, int, int)>
+ filter_nlm_update_output_kernel;
+ KernelFunctions<void (*)(float *, float *, int *, int)> filter_nlm_normalize_kernel;
+
+ KernelFunctions<void (*)(
+ float *, TileInfo *, int, int, int, float *, int *, int *, int, int, bool, int, float)>
+ filter_construct_transform_kernel;
+ KernelFunctions<void (*)(int,
+ int,
+ int,
+ float *,
+ float *,
+ float *,
+ int *,
+ float *,
+ float3 *,
+ int *,
+ int *,
+ int,
+ int,
+ int,
+ int,
+ bool)>
+ filter_nlm_construct_gramian_kernel;
+ KernelFunctions<void (*)(int, int, int, float *, int *, float *, float3 *, int *, int)>
+ filter_finalize_kernel;
+
+ KernelFunctions<void (*)(KernelGlobals *,
+ ccl_constant KernelData *,
+ ccl_global void *,
+ int,
+ ccl_global char *,
+ int,
+ int,
+ int,
+ int,
+ int,
+ int,
+ int,
+ int,
+ ccl_global int *,
+ int,
+ ccl_global char *,
+ ccl_global unsigned int *,
+ unsigned int,
+ ccl_global float *)>
+ data_init_kernel;
+ unordered_map<string, KernelFunctions<void (*)(KernelGlobals *, KernelData *)>> split_kernels;
+
+#define KERNEL_FUNCTIONS(name) \
+ KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
+ KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
+ KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
+
+ CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
+ : Device(info_, stats_, profiler_, background_),
+ texture_info(this, "__texture_info", MEM_TEXTURE),
+#define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name))
+ REGISTER_KERNEL(path_trace),
+ REGISTER_KERNEL(convert_to_half_float),
+ REGISTER_KERNEL(convert_to_byte),
+ REGISTER_KERNEL(shader),
+ REGISTER_KERNEL(filter_divide_shadow),
+ REGISTER_KERNEL(filter_get_feature),
+ REGISTER_KERNEL(filter_write_feature),
+ REGISTER_KERNEL(filter_detect_outliers),
+ REGISTER_KERNEL(filter_combine_halves),
+ REGISTER_KERNEL(filter_nlm_calc_difference),
+ REGISTER_KERNEL(filter_nlm_blur),
+ REGISTER_KERNEL(filter_nlm_calc_weight),
+ REGISTER_KERNEL(filter_nlm_update_output),
+ REGISTER_KERNEL(filter_nlm_normalize),
+ REGISTER_KERNEL(filter_construct_transform),
+ REGISTER_KERNEL(filter_nlm_construct_gramian),
+ REGISTER_KERNEL(filter_finalize),
+ REGISTER_KERNEL(data_init)
+#undef REGISTER_KERNEL
+ {
+ if (info.cpu_threads == 0) {
+ info.cpu_threads = TaskScheduler::num_threads();
+ }
+
+#ifdef WITH_OSL
+ kernel_globals.osl = &osl_globals;
+#endif
+ use_split_kernel = DebugFlags().cpu.split_kernel;
+ if (use_split_kernel) {
+ VLOG(1) << "Will be using split kernel.";
+ }
+ need_texture_info = false;
+
+#define REGISTER_SPLIT_KERNEL(name) \
+ split_kernels[#name] = KernelFunctions<void (*)(KernelGlobals *, KernelData *)>( \
+ KERNEL_FUNCTIONS(name))
+ REGISTER_SPLIT_KERNEL(path_init);
+ REGISTER_SPLIT_KERNEL(scene_intersect);
+ REGISTER_SPLIT_KERNEL(lamp_emission);
+ REGISTER_SPLIT_KERNEL(do_volume);
+ REGISTER_SPLIT_KERNEL(queue_enqueue);
+ REGISTER_SPLIT_KERNEL(indirect_background);
+ REGISTER_SPLIT_KERNEL(shader_setup);
+ REGISTER_SPLIT_KERNEL(shader_sort);
+ REGISTER_SPLIT_KERNEL(shader_eval);
+ REGISTER_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao);
+ REGISTER_SPLIT_KERNEL(subsurface_scatter);
+ REGISTER_SPLIT_KERNEL(direct_lighting);
+ REGISTER_SPLIT_KERNEL(shadow_blocked_ao);
+ REGISTER_SPLIT_KERNEL(shadow_blocked_dl);
+ REGISTER_SPLIT_KERNEL(enqueue_inactive);
+ REGISTER_SPLIT_KERNEL(next_iteration_setup);
+ REGISTER_SPLIT_KERNEL(indirect_subsurface);
+ REGISTER_SPLIT_KERNEL(buffer_update);
+#undef REGISTER_SPLIT_KERNEL
+#undef KERNEL_FUNCTIONS
+ }
+
+ ~CPUDevice()
+ {
+ task_pool.stop();
+ texture_info.free();
+ }
+
+ virtual bool show_samples() const
+ {
+ return (info.cpu_threads == 1);
+ }
+
+ virtual BVHLayoutMask get_bvh_layout_mask() const
+ {
+ BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
+ if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
+ bvh_layout_mask |= BVH_LAYOUT_BVH4;
+ }
+#if defined(__x86_64__) || defined(_M_X64)
+ if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
+ bvh_layout_mask |= BVH_LAYOUT_BVH8;
+ }
+#endif
+#ifdef WITH_EMBREE
+ bvh_layout_mask |= BVH_LAYOUT_EMBREE;
+#endif /* WITH_EMBREE */
+ return bvh_layout_mask;
+ }
+
+ void load_texture_info()
+ {
+ if (need_texture_info) {
+ texture_info.copy_to_device();
+ need_texture_info = false;
+ }
+ }
+
+ void mem_alloc(device_memory &mem)
+ {
+ if (mem.type == MEM_TEXTURE) {
+ assert(!"mem_alloc not supported for textures.");
+ }
+ else {
+ if (mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+ }
+
+ if (mem.type == MEM_DEVICE_ONLY) {
+ assert(!mem.host_pointer);
+ size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
+ void *data = util_aligned_malloc(mem.memory_size(), alignment);
+ mem.device_pointer = (device_ptr)data;
+ }
+ else {
+ mem.device_pointer = (device_ptr)mem.host_pointer;
+ }
+
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+ }
+ }
+
+ void mem_copy_to(device_memory &mem)
+ {
+ if (mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ tex_alloc(mem);
+ }
+ else if (mem.type == MEM_PIXELS) {
+ assert(!"mem_copy_to not supported for pixels.");
+ }
+ else {
+ if (!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
+ /* copy is no-op */
+ }
+ }
+
+ void mem_copy_from(device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/)
+ {
+ /* no-op */
+ }
+
+ void mem_zero(device_memory &mem)
+ {
+ if (!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
+ if (mem.device_pointer) {
+ memset((void *)mem.device_pointer, 0, mem.memory_size());
+ }
+ }
+
+ void mem_free(device_memory &mem)
+ {
+ if (mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ }
+ else if (mem.device_pointer) {
+ if (mem.type == MEM_DEVICE_ONLY) {
+ util_aligned_free((void *)mem.device_pointer);
+ }
+ mem.device_pointer = 0;
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
+ }
+
+ virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
+ {
+ return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
+ }
+
+ void const_copy_to(const char *name, void *host, size_t size)
+ {
+ kernel_const_copy(&kernel_globals, name, host, size);
+ }
+
+ void tex_alloc(device_memory &mem)
+ {
+ VLOG(1) << "Texture allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+
+ if (mem.interpolation == INTERPOLATION_NONE) {
+ /* Data texture. */
+ kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
+ }
+ else {
+ /* Image Texture. */
+ int flat_slot = 0;
+ if (string_startswith(mem.name, "__tex_image")) {
+ int pos = string(mem.name).rfind("_");
+ flat_slot = atoi(mem.name + pos + 1);
+ }
+ else {
+ assert(0);
+ }
+
+ if (flat_slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount
+ * of re-allocations. */
+ texture_info.resize(flat_slot + 128);
+ }
+
+ TextureInfo &info = texture_info[flat_slot];
+ info.data = (uint64_t)mem.host_pointer;
+ info.cl_buffer = 0;
+ info.interpolation = mem.interpolation;
+ info.extension = mem.extension;
+ info.width = mem.data_width;
+ info.height = mem.data_height;
+ info.depth = mem.data_depth;
+
+ need_texture_info = true;
+ }
+
+ mem.device_pointer = (device_ptr)mem.host_pointer;
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+ }
+
+ void tex_free(device_memory &mem)
+ {
+ if (mem.device_pointer) {
+ mem.device_pointer = 0;
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ need_texture_info = true;
+ }
+ }
+
+ void *osl_memory()
+ {
+#ifdef WITH_OSL
+ return &osl_globals;
+#else
+ return NULL;
+#endif
+ }
+
+ void thread_run(DeviceTask *task)
+ {
+ if (task->type == DeviceTask::RENDER) {
+ thread_render(*task);
+ }
+ else if (task->type == DeviceTask::FILM_CONVERT)
+ thread_film_convert(*task);
+ else if (task->type == DeviceTask::SHADER)
+ thread_shader(*task);
+ }
+
+ class CPUDeviceTask : public DeviceTask {
+ public:
+ CPUDeviceTask(CPUDevice *device, DeviceTask &task) : DeviceTask(task)
+ {
+ run = function_bind(&CPUDevice::thread_run, device, this);
+ }
+ };
+
+ bool denoising_non_local_means(device_ptr image_ptr,
+ device_ptr guide_ptr,
+ device_ptr variance_ptr,
+ device_ptr out_ptr,
+ DenoisingTask *task)
+ {
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_NON_LOCAL_MEANS);
+
+ int4 rect = task->rect;
+ int r = task->nlm_state.r;
+ int f = task->nlm_state.f;
+ float a = task->nlm_state.a;
+ float k_2 = task->nlm_state.k_2;
+
+ int w = align_up(rect.z - rect.x, 4);
+ int h = rect.w - rect.y;
+ int stride = task->buffer.stride;
+ int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
+
+ float *temporary_mem = (float *)task->buffer.temporary_mem.device_pointer;
+ float *blurDifference = temporary_mem;
+ float *difference = temporary_mem + task->buffer.pass_stride;
+ float *weightAccum = temporary_mem + 2 * task->buffer.pass_stride;
+
+ memset(weightAccum, 0, sizeof(float) * w * h);
+ memset((float *)out_ptr, 0, sizeof(float) * w * h);
+
+ for (int i = 0; i < (2 * r + 1) * (2 * r + 1); i++) {
+ int dy = i / (2 * r + 1) - r;
+ int dx = i % (2 * r + 1) - r;
+
+ int local_rect[4] = {
+ max(0, -dx), max(0, -dy), rect.z - rect.x - max(0, dx), rect.w - rect.y - max(0, dy)};
+ filter_nlm_calc_difference_kernel()(dx,
+ dy,
+ (float *)guide_ptr,
+ (float *)variance_ptr,
+ NULL,
+ difference,
+ local_rect,
+ w,
+ channel_offset,
+ 0,
+ a,
+ k_2);
+
+ filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
+ filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f);
+ filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
+
+ filter_nlm_update_output_kernel()(dx,
+ dy,
+ blurDifference,
+ (float *)image_ptr,
+ difference,
+ (float *)out_ptr,
+ weightAccum,
+ local_rect,
+ channel_offset,
+ stride,
+ f);
+ }
+
+ int local_rect[4] = {0, 0, rect.z - rect.x, rect.w - rect.y};
+ filter_nlm_normalize_kernel()((float *)out_ptr, weightAccum, local_rect, w);
+
+ return true;
+ }
+
+ bool denoising_construct_transform(DenoisingTask *task)
+ {
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_CONSTRUCT_TRANSFORM);
+
+ for (int y = 0; y < task->filter_area.w; y++) {
+ for (int x = 0; x < task->filter_area.z; x++) {
+ filter_construct_transform_kernel()((float *)task->buffer.mem.device_pointer,
+ task->tile_info,
+ x + task->filter_area.x,
+ y + task->filter_area.y,
+ y * task->filter_area.z + x,
+ (float *)task->storage.transform.device_pointer,
+ (int *)task->storage.rank.device_pointer,
+ &task->rect.x,
+ task->buffer.pass_stride,
+ task->buffer.frame_stride,
+ task->buffer.use_time,
+ task->radius,
+ task->pca_threshold);
+ }
+ }
+ return true;
+ }
+
+ bool denoising_accumulate(device_ptr color_ptr,
+ device_ptr color_variance_ptr,
+ device_ptr scale_ptr,
+ int frame,
+ DenoisingTask *task)
+ {
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_RECONSTRUCT);
+
+ float *temporary_mem = (float *)task->buffer.temporary_mem.device_pointer;
+ float *difference = temporary_mem;
+ float *blurDifference = temporary_mem + task->buffer.pass_stride;
+
+ int r = task->radius;
+ int frame_offset = frame * task->buffer.frame_stride;
+ for (int i = 0; i < (2 * r + 1) * (2 * r + 1); i++) {
+ int dy = i / (2 * r + 1) - r;
+ int dx = i % (2 * r + 1) - r;
+
+ int local_rect[4] = {max(0, -dx),
+ max(0, -dy),
+ task->reconstruction_state.source_w - max(0, dx),
+ task->reconstruction_state.source_h - max(0, dy)};
+ filter_nlm_calc_difference_kernel()(dx,
+ dy,
+ (float *)color_ptr,
+ (float *)color_variance_ptr,
+ (float *)scale_ptr,
+ difference,
+ local_rect,
+ task->buffer.stride,
+ task->buffer.pass_stride,
+ frame_offset,
+ 1.0f,
+ task->nlm_k_2);
+ filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
+ filter_nlm_calc_weight_kernel()(
+ blurDifference, difference, local_rect, task->buffer.stride, 4);
+ filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
+ filter_nlm_construct_gramian_kernel()(dx,
+ dy,
+ task->tile_info->frames[frame],
+ blurDifference,
+ (float *)task->buffer.mem.device_pointer,
+ (float *)task->storage.transform.device_pointer,
+ (int *)task->storage.rank.device_pointer,
+ (float *)task->storage.XtWX.device_pointer,
+ (float3 *)task->storage.XtWY.device_pointer,
+ local_rect,
+ &task->reconstruction_state.filter_window.x,
+ task->buffer.stride,
+ 4,
+ task->buffer.pass_stride,
+ frame_offset,
+ task->buffer.use_time);
+ }
+
+ return true;
+ }
+
+ bool denoising_solve(device_ptr output_ptr, DenoisingTask *task)
+ {
+ for (int y = 0; y < task->filter_area.w; y++) {
+ for (int x = 0; x < task->filter_area.z; x++) {
+ filter_finalize_kernel()(x,
+ y,
+ y * task->filter_area.z + x,
+ (float *)output_ptr,
+ (int *)task->storage.rank.device_pointer,
+ (float *)task->storage.XtWX.device_pointer,
+ (float3 *)task->storage.XtWY.device_pointer,
+ &task->reconstruction_state.buffer_params.x,
+ task->render_buffer.samples);
+ }
+ }
+ return true;
+ }
+
+ bool denoising_combine_halves(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ int r,
+ int4 rect,
+ DenoisingTask *task)
+ {
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_COMBINE_HALVES);
+
+ for (int y = rect.y; y < rect.w; y++) {
+ for (int x = rect.x; x < rect.z; x++) {
+ filter_combine_halves_kernel()(x,
+ y,
+ (float *)mean_ptr,
+ (float *)variance_ptr,
+ (float *)a_ptr,
+ (float *)b_ptr,
+ &rect.x,
+ r);
+ }
+ }
+ return true;
+ }
+
+ bool denoising_divide_shadow(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr sample_variance_ptr,
+ device_ptr sv_variance_ptr,
+ device_ptr buffer_variance_ptr,
+ DenoisingTask *task)
+ {
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DIVIDE_SHADOW);
+
+ for (int y = task->rect.y; y < task->rect.w; y++) {
+ for (int x = task->rect.x; x < task->rect.z; x++) {
+ filter_divide_shadow_kernel()(task->render_buffer.samples,
+ task->tile_info,
+ x,
+ y,
+ (float *)a_ptr,
+ (float *)b_ptr,
+ (float *)sample_variance_ptr,
+ (float *)sv_variance_ptr,
+ (float *)buffer_variance_ptr,
+ &task->rect.x,
+ task->render_buffer.pass_stride,
+ task->render_buffer.offset);
+ }
+ }
+ return true;
+ }
+
+ bool denoising_get_feature(int mean_offset,
+ int variance_offset,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ float scale,
+ DenoisingTask *task)
+ {
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_GET_FEATURE);
+
+ for (int y = task->rect.y; y < task->rect.w; y++) {
+ for (int x = task->rect.x; x < task->rect.z; x++) {
+ filter_get_feature_kernel()(task->render_buffer.samples,
+ task->tile_info,
+ mean_offset,
+ variance_offset,
+ x,
+ y,
+ (float *)mean_ptr,
+ (float *)variance_ptr,
+ scale,
+ &task->rect.x,
+ task->render_buffer.pass_stride,
+ task->render_buffer.offset);
+ }
+ }
+ return true;
+ }
+
+ bool denoising_write_feature(int out_offset,
+ device_ptr from_ptr,
+ device_ptr buffer_ptr,
+ DenoisingTask *task)
+ {
+ for (int y = 0; y < task->filter_area.w; y++) {
+ for (int x = 0; x < task->filter_area.z; x++) {
+ filter_write_feature_kernel()(task->render_buffer.samples,
+ x + task->filter_area.x,
+ y + task->filter_area.y,
+ &task->reconstruction_state.buffer_params.x,
+ (float *)from_ptr,
+ (float *)buffer_ptr,
+ out_offset,
+ &task->rect.x);
+ }
+ }
+ return true;
+ }
+
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+ {
+ ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DETECT_OUTLIERS);
+
+ for (int y = task->rect.y; y < task->rect.w; y++) {
+ for (int x = task->rect.x; x < task->rect.z; x++) {
+ filter_detect_outliers_kernel()(x,
+ y,
+ (float *)image_ptr,
+ (float *)variance_ptr,
+ (float *)depth_ptr,
+ (float *)output_ptr,
+ &task->rect.x,
+ task->buffer.pass_stride);
+ }
+ }
+ return true;
+ }
+
+ void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
+ {
+ const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
+
+ scoped_timer timer(&tile.buffers->render_time);
+
+ Coverage coverage(kg, tile);
+ if (use_coverage) {
+ coverage.init_path_trace();
+ }
+
+ float *render_buffer = (float *)tile.buffer;
+ int start_sample = tile.start_sample;
+ int end_sample = tile.start_sample + tile.num_samples;
+
+ /* Needed for Embree. */
+ SIMD_SET_FLUSH_TO_ZERO;
+
+ for (int sample = start_sample; sample < end_sample; sample++) {
+ if (task.get_cancel() || task_pool.canceled()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ if (use_coverage) {
+ coverage.init_pixel(x, y);
+ }
+ path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+ }
+ }
+
+ tile.sample = sample + 1;
+
+ task.update_progress(&tile, tile.w * tile.h);
+ }
+ if (use_coverage) {
+ coverage.finalize();
+ }
+ }
+
+ void denoise(DenoisingTask &denoising, RenderTile &tile)
+ {
+ ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING);
+
+ tile.sample = tile.start_sample + tile.num_samples;
+
+ denoising.functions.construct_transform = function_bind(
+ &CPUDevice::denoising_construct_transform, this, &denoising);
+ denoising.functions.accumulate = function_bind(
+ &CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.solve = function_bind(&CPUDevice::denoising_solve, this, _1, &denoising);
+ denoising.functions.divide_shadow = function_bind(
+ &CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.non_local_means = function_bind(
+ &CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.combine_halves = function_bind(
+ &CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
+ denoising.functions.get_feature = function_bind(
+ &CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.write_feature = function_bind(
+ &CPUDevice::denoising_write_feature, this, _1, _2, _3, &denoising);
+ denoising.functions.detect_outliers = function_bind(
+ &CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
+
+ denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
+ denoising.render_buffer.samples = tile.sample;
+ denoising.buffer.gpu_temporary_mem = false;
+
+ denoising.run_denoising(&tile);
+ }
+
+ void thread_render(DeviceTask &task)
+ {
+ if (task_pool.canceled()) {
+ if (task.need_finish_queue == false)
+ return;
+ }
+
+ /* allocate buffer for kernel globals */
+ device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals");
+ kgbuffer.alloc_to_device(1);
+
+ KernelGlobals *kg = new ((void *)kgbuffer.device_pointer)
+ KernelGlobals(thread_kernel_globals_init());
+
+ profiler.add_state(&kg->profiler);
+
+ CPUSplitKernel *split_kernel = NULL;
+ if (use_split_kernel) {
+ split_kernel = new CPUSplitKernel(this);
+ if (!split_kernel->load_kernels(requested_features)) {
+ thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
+ kgbuffer.free();
+ delete split_kernel;
+ return;
+ }
+ }
+
+ RenderTile tile;
+ DenoisingTask denoising(this, task);
+ denoising.profiler = &kg->profiler;
+
+ while (task.acquire_tile(this, tile)) {
+ if (tile.task == RenderTile::PATH_TRACE) {
+ if (use_split_kernel) {
+ device_only_memory<uchar> void_buffer(this, "void_buffer");
+ split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
+ }
+ else {
+ path_trace(task, tile, kg);
+ }
+ }
+ else if (tile.task == RenderTile::DENOISE) {
+ denoise(denoising, tile);
+ task.update_progress(&tile, tile.w * tile.h);
+ }
+
+ task.release_tile(tile);
+
+ if (task_pool.canceled()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+ }
+
+ profiler.remove_state(&kg->profiler);
+
+ thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
+ kg->~KernelGlobals();
+ kgbuffer.free();
+ delete split_kernel;
+ }
+
+ void thread_film_convert(DeviceTask &task)
+ {
+ float sample_scale = 1.0f / (task.sample + 1);
+
+ if (task.rgba_half) {
+ for (int y = task.y; y < task.y + task.h; y++)
+ for (int x = task.x; x < task.x + task.w; x++)
+ convert_to_half_float_kernel()(&kernel_globals,
+ (uchar4 *)task.rgba_half,
+ (float *)task.buffer,
+ sample_scale,
+ x,
+ y,
+ task.offset,
+ task.stride);
+ }
+ else {
+ for (int y = task.y; y < task.y + task.h; y++)
+ for (int x = task.x; x < task.x + task.w; x++)
+ convert_to_byte_kernel()(&kernel_globals,
+ (uchar4 *)task.rgba_byte,
+ (float *)task.buffer,
+ sample_scale,
+ x,
+ y,
+ task.offset,
+ task.stride);
+ }
+ }
+
+ void thread_shader(DeviceTask &task)
+ {
+ KernelGlobals *kg = new KernelGlobals(thread_kernel_globals_init());
+
+ for (int sample = 0; sample < task.num_samples; sample++) {
+ for (int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
+ shader_kernel()(kg,
+ (uint4 *)task.shader_input,
+ (float4 *)task.shader_output,
+ task.shader_eval_type,
+ task.shader_filter,
+ x,
+ task.offset,
+ sample);
+
+ if (task.get_cancel() || task_pool.canceled())
+ break;
+
+ task.update_progress(NULL);
+ }
+
+ thread_kernel_globals_free(kg);
+ delete kg;
+ }
+
+ int get_split_task_count(DeviceTask &task)
+ {
+ if (task.type == DeviceTask::SHADER)
+ return task.get_subtask_count(info.cpu_threads, 256);
+ else
+ return task.get_subtask_count(info.cpu_threads);
+ }
+
+ void task_add(DeviceTask &task)
+ {
+ /* Load texture info. */
+ load_texture_info();
+
+ /* split task into smaller ones */
+ list<DeviceTask> tasks;
+
+ if (task.type == DeviceTask::SHADER)
+ task.split(tasks, info.cpu_threads, 256);
+ else
+ task.split(tasks, info.cpu_threads);
+
+ foreach (DeviceTask &task, tasks)
+ task_pool.push(new CPUDeviceTask(this, task));
+ }
+
+ void task_wait()
+ {
+ task_pool.wait_work();
+ }
+
+ void task_cancel()
+ {
+ task_pool.cancel();
+ }
+
+ protected:
+ inline KernelGlobals thread_kernel_globals_init()
+ {
+ KernelGlobals kg = kernel_globals;
+ kg.transparent_shadow_intersections = NULL;
+ const int decoupled_count = sizeof(kg.decoupled_volume_steps) /
+ sizeof(*kg.decoupled_volume_steps);
+ for (int i = 0; i < decoupled_count; ++i) {
+ kg.decoupled_volume_steps[i] = NULL;
+ }
+ kg.decoupled_volume_steps_index = 0;
+ kg.coverage_asset = kg.coverage_object = kg.coverage_material = NULL;
+#ifdef WITH_OSL
+ OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
+#endif
+ return kg;
+ }
+
+ inline void thread_kernel_globals_free(KernelGlobals *kg)
+ {
+ if (kg == NULL) {
+ return;
+ }
+
+ if (kg->transparent_shadow_intersections != NULL) {
+ free(kg->transparent_shadow_intersections);
+ }
+ const int decoupled_count = sizeof(kg->decoupled_volume_steps) /
+ sizeof(*kg->decoupled_volume_steps);
+ for (int i = 0; i < decoupled_count; ++i) {
+ if (kg->decoupled_volume_steps[i] != NULL) {
+ free(kg->decoupled_volume_steps[i]);
+ }
+ }
+#ifdef WITH_OSL
+ OSLShader::thread_free(kg);
+#endif
+ }
+
+ virtual bool load_kernels(const DeviceRequestedFeatures &requested_features_)
+ {
+ requested_features = requested_features_;
+
+ return true;
+ }
+};
+
+/* split kernel */
+
+class CPUSplitKernelFunction : public SplitKernelFunction {
+ public:
+ CPUDevice *device;
+ void (*func)(KernelGlobals *kg, KernelData *data);
+
+ CPUSplitKernelFunction(CPUDevice *device) : device(device), func(NULL)
+ {
+ }
+ ~CPUSplitKernelFunction()
+ {
+ }
+
+ virtual bool enqueue(const KernelDimensions &dim,
+ device_memory &kernel_globals,
+ device_memory &data)
+ {
+ if (!func) {
+ return false;
+ }
+
+ KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer;
+ kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
+
+ for (int y = 0; y < dim.global_size[1]; y++) {
+ for (int x = 0; x < dim.global_size[0]; x++) {
+ kg->global_id = make_int2(x, y);
+
+ func(kg, (KernelData *)data.device_pointer);
+ }
+ }
+
+ return true;
+ }
+};
+
+CPUSplitKernel::CPUSplitKernel(CPUDevice *device) : DeviceSplitKernel(device), device(device)
+{
+}
+
+bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim,
+ RenderTile &rtile,
+ int num_global_elements,
+ device_memory &kernel_globals,
+ device_memory &data,
+ device_memory &split_data,
+ device_memory &ray_state,
+ device_memory &queue_index,
+ device_memory &use_queues_flags,
+ device_memory &work_pool_wgs)
+{
+ KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer;
+ kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
+
+ for (int y = 0; y < dim.global_size[1]; y++) {
+ for (int x = 0; x < dim.global_size[0]; x++) {
+ kg->global_id = make_int2(x, y);
+
+ device->data_init_kernel()((KernelGlobals *)kernel_globals.device_pointer,
+ (KernelData *)data.device_pointer,
+ (void *)split_data.device_pointer,
+ num_global_elements,
+ (char *)ray_state.device_pointer,
+ rtile.start_sample,
+ rtile.start_sample + rtile.num_samples,
+ rtile.x,
+ rtile.y,
+ rtile.w,
+ rtile.h,
+ rtile.offset,
+ rtile.stride,
+ (int *)queue_index.device_pointer,
+ dim.global_size[0] * dim.global_size[1],
+ (char *)use_queues_flags.device_pointer,
+ (uint *)work_pool_wgs.device_pointer,
+ rtile.num_samples,
+ (float *)rtile.buffer);
+ }
+ }
+
+ return true;
+}
+
+SplitKernelFunction *CPUSplitKernel::get_split_kernel_function(const string &kernel_name,
+ const DeviceRequestedFeatures &)
+{
+ CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device);
+
+ kernel->func = device->split_kernels[kernel_name]();
+ if (!kernel->func) {
+ delete kernel;
+ return NULL;
+ }
+
+ return kernel;
+}
+
+int2 CPUSplitKernel::split_kernel_local_size()
+{
+ return make_int2(1, 1);
+}
+
+int2 CPUSplitKernel::split_kernel_global_size(device_memory & /*kg*/,
+ device_memory & /*data*/,
+ DeviceTask * /*task*/)
+{
+ return make_int2(1, 1);
+}
+
+uint64_t CPUSplitKernel::state_buffer_size(device_memory &kernel_globals,
+ device_memory & /*data*/,
+ size_t num_threads)
+{
+ KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer;
+
+ return split_data_buffer_size(kg, num_threads);
+}
+
+Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
+{
+ return new CPUDevice(info, stats, profiler, background);
+}
+
+void device_cpu_info(vector<DeviceInfo> &devices)
+{
+ DeviceInfo info;
+
+ info.type = DEVICE_CPU;
+ info.description = system_cpu_brand_string();
+ info.id = "CPU";
+ info.num = 0;
+ info.has_volume_decoupled = true;
+ info.has_osl = true;
+ info.has_half_images = true;
+ info.has_profiling = true;
+
+ devices.insert(devices.begin(), info);
+}
+
+string device_cpu_capabilities()
+{
+ string capabilities = "";
+ capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
+ capabilities += system_cpu_support_sse3() ? "SSE3 " : "";
+ capabilities += system_cpu_support_sse41() ? "SSE41 " : "";
+ capabilities += system_cpu_support_avx() ? "AVX " : "";
+ capabilities += system_cpu_support_avx2() ? "AVX2" : "";
+ if (capabilities[capabilities.size() - 1] == ' ')
+ capabilities.resize(capabilities.size() - 1);
+ return capabilities;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
--- a/intern/cycles/device/device_cuda.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_cuda.cpp 2020-01-10 20:42:43.460923388 +0300
@@ -1788,6 +1788,23 @@
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+ /* Kernels for adaptive sampling. */
+ CUfunction cuAdaptiveStopping, cuAdaptiveFilterX, cuAdaptiveFilterY, cuAdaptiveScaleSamples;
+ if (task.integrator_adaptive) {
+ cuda_assert(
+ cuModuleGetFunction(&cuAdaptiveStopping, cuModule, "kernel_cuda_adaptive_stopping"));
+ cuda_assert(cuFuncSetCacheConfig(cuAdaptiveStopping, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(
+ cuModuleGetFunction(&cuAdaptiveFilterX, cuModule, "kernel_cuda_adaptive_filter_x"));
+ cuda_assert(cuFuncSetCacheConfig(cuAdaptiveFilterX, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(
+ cuModuleGetFunction(&cuAdaptiveFilterY, cuModule, "kernel_cuda_adaptive_filter_y"));
+ cuda_assert(cuFuncSetCacheConfig(cuAdaptiveFilterY, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuModuleGetFunction(
+ &cuAdaptiveScaleSamples, cuModule, "kernel_cuda_adaptive_scale_samples"));
+ cuda_assert(cuFuncSetCacheConfig(cuAdaptiveScaleSamples, CU_FUNC_CACHE_PREFER_L1));
+ }
+
/* Allocate work tile. */
work_tiles.alloc(1);
@@ -1812,6 +1829,16 @@
uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
+ if (task.integrator_adaptive) {
+ /* Force to either 1, 2 or multiple of 4 samples per kernel invocation. */
+ if (step_samples == 3) {
+ step_samples = 2;
+ }
+ else if (step_samples > 4) {
+ step_samples &= 0xfffffffc;
+ }
+ }
+
/* Render all samples. */
int start_sample = rtile.start_sample;
int end_sample = rtile.start_sample + rtile.num_samples;
@@ -1832,6 +1859,26 @@
cuda_assert(cuLaunchKernel(
cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+ uint filter_sample = sample + wtile->num_samples - 1;
+ /* Run the adaptive sampling kernels when we're at a multiple of 4 samples.
+ * These are a series of tiny kernels because there is no grid synchronisation
+ * from within a kernel, so multiple kernel launches it is. */
+ if (task.integrator_adaptive && (filter_sample & 0x3) == 3) {
+ total_work_size = wtile->h * wtile->w;
+ void *args2[] = {&d_work_tiles, &filter_sample, &total_work_size};
+ num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(
+ cuAdaptiveStopping, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0));
+ total_work_size = wtile->h;
+ num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(
+ cuAdaptiveFilterX, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0));
+ total_work_size = wtile->w;
+ num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(
+ cuAdaptiveFilterY, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0));
+ }
+
cuda_assert(cuCtxSynchronize());
/* Update progress. */
@@ -1843,6 +1890,17 @@
break;
}
}
+
+ if (task.integrator_adaptive) {
+ CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
+ uint total_work_size = wtile->h * wtile->w;
+ void *args[] = {&d_work_tiles, &rtile.start_sample, &rtile.sample, &total_work_size};
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+ cuda_assert(cuLaunchKernel(
+ cuAdaptiveScaleSamples, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+ cuda_assert(cuCtxSynchronize());
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+ }
}
void film_convert(DeviceTask &task,
diff -Naur a/intern/cycles/device/device_cuda.cpp.orig b/intern/cycles/device/device_cuda.cpp.orig
--- a/intern/cycles/device/device_cuda.cpp.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/device/device_cuda.cpp.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,2846 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <climits>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "device/device.h"
+#include "device/device_denoising.h"
+#include "device/device_intern.h"
+#include "device/device_split_kernel.h"
+
+#include "render/buffers.h"
+
+#include "kernel/filter/filter_defines.h"
+
+#ifdef WITH_CUDA_DYNLOAD
+# include "cuew.h"
+#else
+# include "util/util_opengl.h"
+# include <cuda.h>
+# include <cudaGL.h>
+#endif
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_map.h"
+#include "util/util_md5.h"
+#include "util/util_opengl.h"
+#include "util/util_path.h"
+#include "util/util_string.h"
+#include "util/util_system.h"
+#include "util/util_types.h"
+#include "util/util_time.h"
+#include "util/util_windows.h"
+
+#include "kernel/split/kernel_split_data_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef WITH_CUDA_DYNLOAD
+
+/* Transparently implement some functions, so majority of the file does not need
+ * to worry about difference between dynamically loaded and linked CUDA at all.
+ */
+
+namespace {
+
+const char *cuewErrorString(CUresult result)
+{
+ /* We can only give error code here without major code duplication, that
+ * should be enough since dynamic loading is only being disabled by folks
+ * who knows what they're doing anyway.
+ *
+ * NOTE: Avoid call from several threads.
+ */
+ static string error;
+ error = string_printf("%d", result);
+ return error.c_str();
+}
+
+const char *cuewCompilerPath()
+{
+ return CYCLES_CUDA_NVCC_EXECUTABLE;
+}
+
+int cuewCompilerVersion()
+{
+ return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10);
+}
+
+} /* namespace */
+#endif /* WITH_CUDA_DYNLOAD */
+
+class CUDADevice;
+
+class CUDASplitKernel : public DeviceSplitKernel {
+ CUDADevice *device;
+
+ public:
+ explicit CUDASplitKernel(CUDADevice *device);
+
+ virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
+
+ virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
+ RenderTile &rtile,
+ int num_global_elements,
+ device_memory &kernel_globals,
+ device_memory &kernel_data_,
+ device_memory &split_data,
+ device_memory &ray_state,
+ device_memory &queue_index,
+ device_memory &use_queues_flag,
+ device_memory &work_pool_wgs);
+
+ virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
+ const DeviceRequestedFeatures &);
+ virtual int2 split_kernel_local_size();
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+};
+
+/* Utility to push/pop CUDA context. */
+class CUDAContextScope {
+ public:
+ CUDAContextScope(CUDADevice *device);
+ ~CUDAContextScope();
+
+ private:
+ CUDADevice *device;
+};
+
+class CUDADevice : public Device {
+ public:
+ DedicatedTaskPool task_pool;
+ CUdevice cuDevice;
+ CUcontext cuContext;
+ CUmodule cuModule, cuFilterModule;
+ size_t device_texture_headroom;
+ size_t device_working_headroom;
+ bool move_texture_to_host;
+ size_t map_host_used;
+ size_t map_host_limit;
+ int can_map_host;
+ int cuDevId;
+ int cuDevArchitecture;
+ bool first_error;
+ CUDASplitKernel *split_kernel;
+
+ struct CUDAMem {
+ CUDAMem() : texobject(0), array(0), use_mapped_host(false)
+ {
+ }
+
+ CUtexObject texobject;
+ CUarray array;
+
+ /* If true, a mapped host memory in shared_pointer is being used. */
+ bool use_mapped_host;
+ };
+ typedef map<device_memory *, CUDAMem> CUDAMemMap;
+ CUDAMemMap cuda_mem_map;
+
+ struct PixelMem {
+ GLuint cuPBO;
+ CUgraphicsResource cuPBOresource;
+ GLuint cuTexId;
+ int w, h;
+ };
+ map<device_ptr, PixelMem> pixel_mem_map;
+
+ /* Bindless Textures */
+ device_vector<TextureInfo> texture_info;
+ bool need_texture_info;
+
+ CUdeviceptr cuda_device_ptr(device_ptr mem)
+ {
+ return (CUdeviceptr)mem;
+ }
+
+ static bool have_precompiled_kernels()
+ {
+ string cubins_path = path_get("lib");
+ return path_exists(cubins_path);
+ }
+
+ virtual bool show_samples() const
+ {
+ /* The CUDADevice only processes one tile at a time, so showing samples is fine. */
+ return true;
+ }
+
+ virtual BVHLayoutMask get_bvh_layout_mask() const
+ {
+ return BVH_LAYOUT_BVH2;
+ }
+
+ /*#ifdef NDEBUG
+#define cuda_abort()
+#else
+#define cuda_abort() abort()
+#endif*/
+ void cuda_error_documentation()
+ {
+ if (first_error) {
+ fprintf(stderr,
+ "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
+ fprintf(stderr,
+ "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
+ first_error = false;
+ }
+ }
+
+#define cuda_assert(stmt) \
+ { \
+ CUresult result = stmt; \
+\
+ if (result != CUDA_SUCCESS) { \
+ string message = string_printf( \
+ "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \
+ if (error_msg == "") \
+ error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ /*cuda_abort();*/ \
+ cuda_error_documentation(); \
+ } \
+ } \
+ (void)0
+
+ bool cuda_error_(CUresult result, const string &stmt)
+ {
+ if (result == CUDA_SUCCESS)
+ return false;
+
+ string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result));
+ if (error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+ cuda_error_documentation();
+ return true;
+ }
+
+#define cuda_error(stmt) cuda_error_(stmt, #stmt)
+
+ void cuda_error_message(const string &message)
+ {
+ if (error_msg == "")
+ error_msg = message;
+ fprintf(stderr, "%s\n", message.c_str());
+ cuda_error_documentation();
+ }
+
+ CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
+ : Device(info, stats, profiler, background_),
+ texture_info(this, "__texture_info", MEM_TEXTURE)
+ {
+ first_error = true;
+ background = background_;
+
+ cuDevId = info.num;
+ cuDevice = 0;
+ cuContext = 0;
+
+ cuModule = 0;
+ cuFilterModule = 0;
+
+ split_kernel = NULL;
+
+ need_texture_info = false;
+
+ device_texture_headroom = 0;
+ device_working_headroom = 0;
+ move_texture_to_host = false;
+ map_host_limit = 0;
+ map_host_used = 0;
+ can_map_host = 0;
+
+ /* Intialize CUDA. */
+ if (cuda_error(cuInit(0)))
+ return;
+
+ /* Setup device and context. */
+ if (cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
+ return;
+
+ /* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
+ * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
+ * so we can predict which memory to map to host. */
+ cuda_assert(
+ cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
+
+ unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
+ if (can_map_host) {
+ ctx_flags |= CU_CTX_MAP_HOST;
+ init_host_memory();
+ }
+
+ /* Create context. */
+ CUresult result;
+
+ if (background) {
+ result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
+ }
+ else {
+ result = cuGLCtxCreate(&cuContext, ctx_flags, cuDevice);
+
+ if (result != CUDA_SUCCESS) {
+ result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
+ background = true;
+ }
+ }
+
+ if (cuda_error_(result, "cuCtxCreate"))
+ return;
+
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+ cuDevArchitecture = major * 100 + minor * 10;
+
+ /* Pop context set by cuCtxCreate. */
+ cuCtxPopCurrent(NULL);
+ }
+
+ ~CUDADevice()
+ {
+ task_pool.stop();
+
+ delete split_kernel;
+
+ texture_info.free();
+
+ cuda_assert(cuCtxDestroy(cuContext));
+ }
+
+ bool support_device(const DeviceRequestedFeatures & /*requested_features*/)
+ {
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+ /* We only support sm_30 and above */
+ if (major < 3) {
+ cuda_error_message(string_printf(
+ "CUDA device supported only with compute capability 3.0 or up, found %d.%d.",
+ major,
+ minor));
+ return false;
+ }
+
+ return true;
+ }
+
+ bool use_adaptive_compilation()
+ {
+ return DebugFlags().cuda.adaptive_compile;
+ }
+
+ bool use_split_kernel()
+ {
+ return DebugFlags().cuda.split_kernel;
+ }
+
+ /* Common NVCC flags which stays the same regardless of shading model,
+ * kernel sources md5 and only depends on compiler or compilation settings.
+ */
+ string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
+ bool filter = false,
+ bool split = false)
+ {
+ const int machine = system_cpu_bits();
+ const string source_path = path_get("source");
+ const string include_path = source_path;
+ string cflags = string_printf(
+ "-m%d "
+ "--ptxas-options=\"-v\" "
+ "--use_fast_math "
+ "-DNVCC "
+ "-I\"%s\"",
+ machine,
+ include_path.c_str());
+ if (!filter && use_adaptive_compilation()) {
+ cflags += " " + requested_features.get_build_options();
+ }
+ const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
+ if (extra_cflags) {
+ cflags += string(" ") + string(extra_cflags);
+ }
+#ifdef WITH_CYCLES_DEBUG
+ cflags += " -D__KERNEL_DEBUG__";
+#endif
+
+ if (split) {
+ cflags += " -D__SPLIT__";
+ }
+
+ return cflags;
+ }
+
+ bool compile_check_compiler()
+ {
+ const char *nvcc = cuewCompilerPath();
+ if (nvcc == NULL) {
+ cuda_error_message(
+ "CUDA nvcc compiler not found. "
+ "Install CUDA toolkit in default location.");
+ return false;
+ }
+ const int cuda_version = cuewCompilerVersion();
+ VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << ".";
+ const int major = cuda_version / 10, minor = cuda_version % 10;
+ if (cuda_version == 0) {
+ cuda_error_message("CUDA nvcc compiler version could not be parsed.");
+ return false;
+ }
+ if (cuda_version < 80) {
+ printf(
+ "Unsupported CUDA version %d.%d detected, "
+ "you need CUDA 8.0 or newer.\n",
+ major,
+ minor);
+ return false;
+ }
+ else if (cuda_version != 101) {
+ printf(
+ "CUDA version %d.%d detected, build may succeed but only "
+ "CUDA 10.1 is officially supported.\n",
+ major,
+ minor);
+ }
+ return true;
+ }
+
+ string compile_kernel(const DeviceRequestedFeatures &requested_features,
+ bool filter = false,
+ bool split = false)
+ {
+ const char *name, *source;
+ if (filter) {
+ name = "filter";
+ source = "filter.cu";
+ }
+ else if (split) {
+ name = "kernel_split";
+ source = "kernel_split.cu";
+ }
+ else {
+ name = "kernel";
+ source = "kernel.cu";
+ }
+ /* Compute cubin name. */
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+ /* Attempt to use kernel provided with Blender. */
+ if (!use_adaptive_compilation()) {
+ const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+ VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
+ if (path_exists(cubin)) {
+ VLOG(1) << "Using precompiled kernel.";
+ return cubin;
+ }
+ const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
+ VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+ if (path_exists(ptx)) {
+ VLOG(1) << "Using precompiled kernel.";
+ return ptx;
+ }
+ }
+
+ const string common_cflags = compile_kernel_get_common_cflags(
+ requested_features, filter, split);
+
+ /* Try to use locally compiled kernel. */
+ const string source_path = path_get("source");
+ const string kernel_md5 = path_files_md5_hash(source_path);
+
+ /* We include cflags into md5 so changing cuda toolkit or changing other
+ * compiler command line arguments makes sure cubin gets re-built.
+ */
+ const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
+
+ const string cubin_file = string_printf(
+ "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str());
+ const string cubin = path_cache_get(path_join("kernels", cubin_file));
+ VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
+ if (path_exists(cubin)) {
+ VLOG(1) << "Using locally compiled kernel.";
+ return cubin;
+ }
+
+#ifdef _WIN32
+ if (have_precompiled_kernels()) {
+ if (major < 3) {
+ cuda_error_message(
+ string_printf("CUDA device requires compute capability 3.0 or up, "
+ "found %d.%d. Your GPU is not supported.",
+ major,
+ minor));
+ }
+ else {
+ cuda_error_message(
+ string_printf("CUDA binary kernel for this graphics card compute "
+ "capability (%d.%d) not found.",
+ major,
+ minor));
+ }
+ return "";
+ }
+#endif
+
+ /* Compile. */
+ if (!compile_check_compiler()) {
+ return "";
+ }
+ const char *nvcc = cuewCompilerPath();
+ const string kernel = path_join(path_join(source_path, "kernel"),
+ path_join("kernels", path_join("cuda", source)));
+ double starttime = time_dt();
+ printf("Compiling CUDA kernel ...\n");
+
+ path_create_directories(cubin);
+
+ string command = string_printf(
+ "\"%s\" "
+ "-arch=sm_%d%d "
+ "--cubin \"%s\" "
+ "-o \"%s\" "
+ "%s ",
+ nvcc,
+ major,
+ minor,
+ kernel.c_str(),
+ cubin.c_str(),
+ common_cflags.c_str());
+
+ printf("%s\n", command.c_str());
+
+ if (system(command.c_str()) == -1) {
+ cuda_error_message(
+ "Failed to execute compilation command, "
+ "see console for details.");
+ return "";
+ }
+
+ /* Verify if compilation succeeded */
+ if (!path_exists(cubin)) {
+ cuda_error_message(
+ "CUDA kernel compilation failed, "
+ "see console for details.");
+ return "";
+ }
+
+ printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
+
+ return cubin;
+ }
+
+ bool load_kernels(const DeviceRequestedFeatures &requested_features)
+ {
+ /* TODO(sergey): Support kernels re-load for CUDA devices.
+ *
+ * Currently re-loading kernel will invalidate memory pointers,
+ * causing problems in cuCtxSynchronize.
+ */
+ if (cuFilterModule && cuModule) {
+ VLOG(1) << "Skipping kernel reload, not currently supported.";
+ return true;
+ }
+
+ /* check if cuda init succeeded */
+ if (cuContext == 0)
+ return false;
+
+ /* check if GPU is supported */
+ if (!support_device(requested_features))
+ return false;
+
+ /* get kernel */
+ string cubin = compile_kernel(requested_features, false, use_split_kernel());
+ if (cubin == "")
+ return false;
+
+ string filter_cubin = compile_kernel(requested_features, true, false);
+ if (filter_cubin == "")
+ return false;
+
+ /* open module */
+ CUDAContextScope scope(this);
+
+ string cubin_data;
+ CUresult result;
+
+ if (path_read_text(cubin, cubin_data))
+ result = cuModuleLoadData(&cuModule, cubin_data.c_str());
+ else
+ result = CUDA_ERROR_FILE_NOT_FOUND;
+
+ if (cuda_error_(result, "cuModuleLoad"))
+ cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str()));
+
+ if (path_read_text(filter_cubin, cubin_data))
+ result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str());
+ else
+ result = CUDA_ERROR_FILE_NOT_FOUND;
+
+ if (cuda_error_(result, "cuModuleLoad"))
+ cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str()));
+
+ if (result == CUDA_SUCCESS) {
+ reserve_local_memory(requested_features);
+ }
+
+ return (result == CUDA_SUCCESS);
+ }
+
+ void reserve_local_memory(const DeviceRequestedFeatures &requested_features)
+ {
+ if (use_split_kernel()) {
+ /* Split kernel mostly uses global memory and adaptive compilation,
+ * difficult to predict how much is needed currently. */
+ return;
+ }
+
+ /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory
+ * needed for kernel launches, so that we can reliably figure out when
+ * to allocate scene data in mapped host memory. */
+ CUDAContextScope scope(this);
+
+ size_t total = 0, free_before = 0, free_after = 0;
+ cuMemGetInfo(&free_before, &total);
+
+ /* Get kernel function. */
+ CUfunction cuPathTrace;
+
+ if (requested_features.use_integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ }
+
+ cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+
+ int min_blocks, num_threads_per_block;
+ cuda_assert(cuOccupancyMaxPotentialBlockSize(
+ &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+
+ /* Launch kernel, using just 1 block appears sufficient to reserve
+ * memory for all multiprocessors. It would be good to do this in
+ * parallel for the multi GPU case still to make it faster. */
+ CUdeviceptr d_work_tiles = 0;
+ uint total_work_size = 0;
+
+ void *args[] = {&d_work_tiles, &total_work_size};
+
+ cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+
+ cuda_assert(cuCtxSynchronize());
+
+ cuMemGetInfo(&free_after, &total);
+ VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after)
+ << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
+
+#if 0
+ /* For testing mapped host memory, fill up device memory. */
+ const size_t keep_mb = 1024;
+
+ while (free_after > keep_mb * 1024 * 1024LL) {
+ CUdeviceptr tmp;
+ cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL));
+ cuMemGetInfo(&free_after, &total);
+ }
+#endif
+ }
+
+ void init_host_memory()
+ {
+ /* Limit amount of host mapped memory, because allocating too much can
+ * cause system instability. Leave at least half or 4 GB of system
+ * memory free, whichever is smaller. */
+ size_t default_limit = 4 * 1024 * 1024 * 1024LL;
+ size_t system_ram = system_physical_ram();
+
+ if (system_ram > 0) {
+ if (system_ram / 2 > default_limit) {
+ map_host_limit = system_ram - default_limit;
+ }
+ else {
+ map_host_limit = system_ram / 2;
+ }
+ }
+ else {
+ VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
+ map_host_limit = 0;
+ }
+
+ /* Amount of device memory to keep is free after texture memory
+ * and working memory allocations respectively. We set the working
+ * memory limit headroom lower so that some space is left after all
+ * texture memory allocations. */
+ device_working_headroom = 32 * 1024 * 1024LL; // 32MB
+ device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
+
+ VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
+ << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
+ }
+
+ void load_texture_info()
+ {
+ if (need_texture_info) {
+ texture_info.copy_to_device();
+ need_texture_info = false;
+ }
+ }
+
+ void move_textures_to_host(size_t size, bool for_texture)
+ {
+ /* Signal to reallocate textures in host memory only. */
+ move_texture_to_host = true;
+
+ while (size > 0) {
+ /* Find suitable memory allocation to move. */
+ device_memory *max_mem = NULL;
+ size_t max_size = 0;
+ bool max_is_image = false;
+
+ foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
+ device_memory &mem = *pair.first;
+ CUDAMem *cmem = &pair.second;
+
+ bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+ bool is_image = is_texture && (mem.data_height > 1);
+
+ /* Can't move this type of memory. */
+ if (!is_texture || cmem->array) {
+ continue;
+ }
+
+ /* Already in host memory. */
+ if (cmem->use_mapped_host) {
+ continue;
+ }
+
+ /* For other textures, only move image textures. */
+ if (for_texture && !is_image) {
+ continue;
+ }
+
+ /* Try to move largest allocation, prefer moving images. */
+ if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
+ max_is_image = is_image;
+ max_size = mem.device_size;
+ max_mem = &mem;
+ }
+ }
+
+ /* Move to host memory. This part is mutex protected since
+ * multiple CUDA devices could be moving the memory. The
+ * first one will do it, and the rest will adopt the pointer. */
+ if (max_mem) {
+ VLOG(1) << "Move memory from device to host: " << max_mem->name;
+
+ static thread_mutex move_mutex;
+ thread_scoped_lock lock(move_mutex);
+
+ /* Preserve the original device pointer, in case of multi device
+ * we can't change it because the pointer mapping would break. */
+ device_ptr prev_pointer = max_mem->device_pointer;
+ size_t prev_size = max_mem->device_size;
+
+ tex_free(*max_mem);
+ tex_alloc(*max_mem);
+ size = (max_size >= size) ? 0 : size - max_size;
+
+ max_mem->device_pointer = prev_pointer;
+ max_mem->device_size = prev_size;
+ }
+ else {
+ break;
+ }
+ }
+
+ /* Update texture info array with new pointers. */
+ load_texture_info();
+
+ move_texture_to_host = false;
+ }
+
+ CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0)
+ {
+ CUDAContextScope scope(this);
+
+ CUdeviceptr device_pointer = 0;
+ size_t size = mem.memory_size() + pitch_padding;
+
+ CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
+ const char *status = "";
+
+ /* First try allocating in device memory, respecting headroom. We make
+ * an exception for texture info. It is small and frequently accessed,
+ * so treat it as working memory.
+ *
+ * If there is not enough room for working memory, we will try to move
+ * textures to host memory, assuming the performance impact would have
+ * been worse for working memory. */
+ bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+ bool is_image = is_texture && (mem.data_height > 1);
+
+ size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
+
+ size_t total = 0, free = 0;
+ cuMemGetInfo(&free, &total);
+
+ /* Move textures to host memory if needed. */
+ if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
+ move_textures_to_host(size + headroom - free, is_texture);
+ cuMemGetInfo(&free, &total);
+ }
+
+ /* Allocate in device memory. */
+ if (!move_texture_to_host && (size + headroom) < free) {
+ mem_alloc_result = cuMemAlloc(&device_pointer, size);
+ if (mem_alloc_result == CUDA_SUCCESS) {
+ status = " in device memory";
+ }
+ }
+
+ /* Fall back to mapped host memory if needed and possible. */
+
+ void *shared_pointer = 0;
+
+ if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
+ if (mem.shared_pointer) {
+ /* Another device already allocated host memory. */
+ mem_alloc_result = CUDA_SUCCESS;
+ shared_pointer = mem.shared_pointer;
+ }
+ else if (map_host_used + size < map_host_limit) {
+ /* Allocate host memory ourselves. */
+ mem_alloc_result = cuMemHostAlloc(
+ &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
+
+ assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
+ (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
+ }
+
+ if (mem_alloc_result == CUDA_SUCCESS) {
+ cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
+ map_host_used += size;
+ status = " in host memory";
+ }
+ else {
+ status = " failed, out of host memory";
+ }
+ }
+
+ if (mem_alloc_result != CUDA_SUCCESS) {
+ status = " failed, out of device and host memory";
+ cuda_assert(mem_alloc_result);
+ }
+
+ if (mem.name) {
+ VLOG(1) << "Buffer allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")" << status;
+ }
+
+ mem.device_pointer = (device_ptr)device_pointer;
+ mem.device_size = size;
+ stats.mem_alloc(size);
+
+ if (!mem.device_pointer) {
+ return NULL;
+ }
+
+ /* Insert into map of allocations. */
+ CUDAMem *cmem = &cuda_mem_map[&mem];
+ if (shared_pointer != 0) {
+ /* Replace host pointer with our host allocation. Only works if
+ * CUDA memory layout is the same and has no pitch padding. Also
+ * does not work if we move textures to host during a render,
+ * since other devices might be using the memory. */
+
+ if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
+ mem.host_pointer != shared_pointer) {
+ memcpy(shared_pointer, mem.host_pointer, size);
+
+ /* A Call to device_memory::host_free() should be preceded by
+ * a call to device_memory::device_free() for host memory
+ * allocated by a device to be handled properly. Two exceptions
+ * are here and a call in OptiXDevice::generic_alloc(), where
+ * the current host memory can be assumed to be allocated by
+ * device_memory::host_alloc(), not by a device */
+
+ mem.host_free();
+ mem.host_pointer = shared_pointer;
+ }
+ mem.shared_pointer = shared_pointer;
+ mem.shared_counter++;
+ cmem->use_mapped_host = true;
+ }
+ else {
+ cmem->use_mapped_host = false;
+ }
+
+ return cmem;
+ }
+
+ void generic_copy_to(device_memory &mem)
+ {
+ if (mem.host_pointer && mem.device_pointer) {
+ CUDAContextScope scope(this);
+
+ /* If use_mapped_host of mem is false, the current device only
+ * uses device memory allocated by cuMemAlloc regardless of
+ * mem.host_pointer and mem.shared_pointer, and should copy
+ * data from mem.host_pointer. */
+
+ if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) {
+ cuda_assert(cuMemcpyHtoD(
+ cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
+ }
+ }
+ }
+
+ void generic_free(device_memory &mem)
+ {
+ if (mem.device_pointer) {
+ CUDAContextScope scope(this);
+ const CUDAMem &cmem = cuda_mem_map[&mem];
+
+ /* If cmem.use_mapped_host is true, reference counting is used
+ * to safely free a mapped host memory. */
+
+ if (cmem.use_mapped_host) {
+ assert(mem.shared_pointer);
+ if (mem.shared_pointer) {
+ assert(mem.shared_counter > 0);
+ if (--mem.shared_counter == 0) {
+ if (mem.host_pointer == mem.shared_pointer) {
+ mem.host_pointer = 0;
+ }
+ cuMemFreeHost(mem.shared_pointer);
+ mem.shared_pointer = 0;
+ }
+ }
+ map_host_used -= mem.device_size;
+ }
+ else {
+ /* Free device memory. */
+ cuMemFree(mem.device_pointer);
+ }
+
+ stats.mem_free(mem.device_size);
+ mem.device_pointer = 0;
+ mem.device_size = 0;
+
+ cuda_mem_map.erase(cuda_mem_map.find(&mem));
+ }
+ }
+
+ void mem_alloc(device_memory &mem)
+ {
+ if (mem.type == MEM_PIXELS && !background) {
+ pixels_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ assert(!"mem_alloc not supported for textures.");
+ }
+ else {
+ generic_alloc(mem);
+ }
+ }
+
+ void mem_copy_to(device_memory &mem)
+ {
+ if (mem.type == MEM_PIXELS) {
+ assert(!"mem_copy_to not supported for pixels.");
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ tex_alloc(mem);
+ }
+ else {
+ if (!mem.device_pointer) {
+ generic_alloc(mem);
+ }
+
+ generic_copy_to(mem);
+ }
+ }
+
+ void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
+ {
+ if (mem.type == MEM_PIXELS && !background) {
+ pixels_copy_from(mem, y, w, h);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ assert(!"mem_copy_from not supported for textures.");
+ }
+ else {
+ CUDAContextScope scope(this);
+ size_t offset = elem * y * w;
+ size_t size = elem * w * h;
+
+ if (mem.host_pointer && mem.device_pointer) {
+ cuda_assert(cuMemcpyDtoH(
+ (uchar *)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size));
+ }
+ else if (mem.host_pointer) {
+ memset((char *)mem.host_pointer + offset, 0, size);
+ }
+ }
+ }
+
+ void mem_zero(device_memory &mem)
+ {
+ if (!mem.device_pointer) {
+ mem_alloc(mem);
+ }
+
+ if (mem.host_pointer) {
+ memset(mem.host_pointer, 0, mem.memory_size());
+ }
+
+ /* If use_mapped_host of mem is false, mem.device_pointer currently
+ * refers to device memory regardless of mem.host_pointer and
+ * mem.shared_pointer. */
+
+ if (mem.device_pointer &&
+ (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
+ CUDAContextScope scope(this);
+ cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
+ }
+ }
+
+ void mem_free(device_memory &mem)
+ {
+ if (mem.type == MEM_PIXELS && !background) {
+ pixels_free(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free(mem);
+ }
+ else {
+ generic_free(mem);
+ }
+ }
+
+ virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
+ {
+ return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
+ }
+
+ void const_copy_to(const char *name, void *host, size_t size)
+ {
+ CUDAContextScope scope(this);
+ CUdeviceptr mem;
+ size_t bytes;
+
+ cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
+ // assert(bytes == size);
+ cuda_assert(cuMemcpyHtoD(mem, host, size));
+ }
+
+ void tex_alloc(device_memory &mem)
+ {
+ CUDAContextScope scope(this);
+
+ /* General variables for both architectures */
+ string bind_name = mem.name;
+ size_t dsize = datatype_size(mem.data_type);
+ size_t size = mem.memory_size();
+
+ CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
+ switch (mem.extension) {
+ case EXTENSION_REPEAT:
+ address_mode = CU_TR_ADDRESS_MODE_WRAP;
+ break;
+ case EXTENSION_EXTEND:
+ address_mode = CU_TR_ADDRESS_MODE_CLAMP;
+ break;
+ case EXTENSION_CLIP:
+ address_mode = CU_TR_ADDRESS_MODE_BORDER;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ CUfilter_mode filter_mode;
+ if (mem.interpolation == INTERPOLATION_CLOSEST) {
+ filter_mode = CU_TR_FILTER_MODE_POINT;
+ }
+ else {
+ filter_mode = CU_TR_FILTER_MODE_LINEAR;
+ }
+
+ /* Data Storage */
+ if (mem.interpolation == INTERPOLATION_NONE) {
+ generic_alloc(mem);
+ generic_copy_to(mem);
+
+ CUdeviceptr cumem;
+ size_t cubytes;
+
+ cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
+
+ if (cubytes == 8) {
+ /* 64 bit device pointer */
+ uint64_t ptr = mem.device_pointer;
+ cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes));
+ }
+ else {
+ /* 32 bit device pointer */
+ uint32_t ptr = (uint32_t)mem.device_pointer;
+ cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes));
+ }
+ return;
+ }
+
+ /* Image Texture Storage */
+ CUarray_format_enum format;
+ switch (mem.data_type) {
+ case TYPE_UCHAR:
+ format = CU_AD_FORMAT_UNSIGNED_INT8;
+ break;
+ case TYPE_UINT16:
+ format = CU_AD_FORMAT_UNSIGNED_INT16;
+ break;
+ case TYPE_UINT:
+ format = CU_AD_FORMAT_UNSIGNED_INT32;
+ break;
+ case TYPE_INT:
+ format = CU_AD_FORMAT_SIGNED_INT32;
+ break;
+ case TYPE_FLOAT:
+ format = CU_AD_FORMAT_FLOAT;
+ break;
+ case TYPE_HALF:
+ format = CU_AD_FORMAT_HALF;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ CUDAMem *cmem = NULL;
+ CUarray array_3d = NULL;
+ size_t src_pitch = mem.data_width * dsize * mem.data_elements;
+ size_t dst_pitch = src_pitch;
+
+ if (mem.data_depth > 1) {
+ /* 3D texture using array, there is no API for linear memory. */
+ CUDA_ARRAY3D_DESCRIPTOR desc;
+
+ desc.Width = mem.data_width;
+ desc.Height = mem.data_height;
+ desc.Depth = mem.data_depth;
+ desc.Format = format;
+ desc.NumChannels = mem.data_elements;
+ desc.Flags = 0;
+
+ VLOG(1) << "Array 3D allocate: " << mem.name << ", "
+ << string_human_readable_number(mem.memory_size()) << " bytes. ("
+ << string_human_readable_size(mem.memory_size()) << ")";
+
+ cuda_assert(cuArray3DCreate(&array_3d, &desc));
+
+ if (!array_3d) {
+ return;
+ }
+
+ CUDA_MEMCPY3D param;
+ memset(&param, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ param.dstArray = array_3d;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = mem.host_pointer;
+ param.srcPitch = src_pitch;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+ param.Depth = mem.data_depth;
+
+ cuda_assert(cuMemcpy3D(&param));
+
+ mem.device_pointer = (device_ptr)array_3d;
+ mem.device_size = size;
+ stats.mem_alloc(size);
+
+ cmem = &cuda_mem_map[&mem];
+ cmem->texobject = 0;
+ cmem->array = array_3d;
+ }
+ else if (mem.data_height > 0) {
+ /* 2D texture, using pitch aligned linear memory. */
+ int alignment = 0;
+ cuda_assert(
+ cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
+ dst_pitch = align_up(src_pitch, alignment);
+ size_t dst_size = dst_pitch * mem.data_height;
+
+ cmem = generic_alloc(mem, dst_size - mem.memory_size());
+ if (!cmem) {
+ return;
+ }
+
+ CUDA_MEMCPY2D param;
+ memset(&param, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+ param.dstDevice = mem.device_pointer;
+ param.dstPitch = dst_pitch;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = mem.host_pointer;
+ param.srcPitch = src_pitch;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+
+ cuda_assert(cuMemcpy2DUnaligned(&param));
+ }
+ else {
+ /* 1D texture, using linear memory. */
+ cmem = generic_alloc(mem);
+ if (!cmem) {
+ return;
+ }
+
+ cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
+ }
+
+ /* Kepler+, bindless textures. */
+ int flat_slot = 0;
+ if (string_startswith(mem.name, "__tex_image")) {
+ int pos = string(mem.name).rfind("_");
+ flat_slot = atoi(mem.name + pos + 1);
+ }
+ else {
+ assert(0);
+ }
+
+ CUDA_RESOURCE_DESC resDesc;
+ memset(&resDesc, 0, sizeof(resDesc));
+
+ if (array_3d) {
+ resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+ resDesc.res.array.hArray = array_3d;
+ resDesc.flags = 0;
+ }
+ else if (mem.data_height > 0) {
+ resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
+ resDesc.res.pitch2D.devPtr = mem.device_pointer;
+ resDesc.res.pitch2D.format = format;
+ resDesc.res.pitch2D.numChannels = mem.data_elements;
+ resDesc.res.pitch2D.height = mem.data_height;
+ resDesc.res.pitch2D.width = mem.data_width;
+ resDesc.res.pitch2D.pitchInBytes = dst_pitch;
+ }
+ else {
+ resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
+ resDesc.res.linear.devPtr = mem.device_pointer;
+ resDesc.res.linear.format = format;
+ resDesc.res.linear.numChannels = mem.data_elements;
+ resDesc.res.linear.sizeInBytes = mem.device_size;
+ }
+
+ CUDA_TEXTURE_DESC texDesc;
+ memset(&texDesc, 0, sizeof(texDesc));
+ texDesc.addressMode[0] = address_mode;
+ texDesc.addressMode[1] = address_mode;
+ texDesc.addressMode[2] = address_mode;
+ texDesc.filterMode = filter_mode;
+ texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+ cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
+
+ /* Resize once */
+ if (flat_slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount
+ * of re-allocations. */
+ texture_info.resize(flat_slot + 128);
+ }
+
+ /* Set Mapping and tag that we need to (re-)upload to device */
+ TextureInfo &info = texture_info[flat_slot];
+ info.data = (uint64_t)cmem->texobject;
+ info.cl_buffer = 0;
+ info.interpolation = mem.interpolation;
+ info.extension = mem.extension;
+ info.width = mem.data_width;
+ info.height = mem.data_height;
+ info.depth = mem.data_depth;
+ need_texture_info = true;
+ }
+
+ void tex_free(device_memory &mem)
+ {
+ if (mem.device_pointer) {
+ CUDAContextScope scope(this);
+ const CUDAMem &cmem = cuda_mem_map[&mem];
+
+ if (cmem.texobject) {
+ /* Free bindless texture. */
+ cuTexObjectDestroy(cmem.texobject);
+ }
+
+ if (cmem.array) {
+ /* Free array. */
+ cuArrayDestroy(cmem.array);
+ stats.mem_free(mem.device_size);
+ mem.device_pointer = 0;
+ mem.device_size = 0;
+
+ cuda_mem_map.erase(cuda_mem_map.find(&mem));
+ }
+ else {
+ generic_free(mem);
+ }
+ }
+ }
+
+#define CUDA_GET_BLOCKSIZE(func, w, h) \
+ int threads_per_block; \
+ cuda_assert( \
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+ int threads = (int)sqrt((float)threads_per_block); \
+ int xblocks = ((w) + threads - 1) / threads; \
+ int yblocks = ((h) + threads - 1) / threads;
+
+#define CUDA_LAUNCH_KERNEL(func, args) \
+ cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0));
+
+/* Similar as above, but for 1-dimensional blocks. */
+#define CUDA_GET_BLOCKSIZE_1D(func, w, h) \
+ int threads_per_block; \
+ cuda_assert( \
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+ int xblocks = ((w) + threads_per_block - 1) / threads_per_block; \
+ int yblocks = h;
+
+#define CUDA_LAUNCH_KERNEL_1D(func, args) \
+ cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads_per_block, 1, 1, 0, 0, args, 0));
+
+ bool denoising_non_local_means(device_ptr image_ptr,
+ device_ptr guide_ptr,
+ device_ptr variance_ptr,
+ device_ptr out_ptr,
+ DenoisingTask *task)
+ {
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ int stride = task->buffer.stride;
+ int w = task->buffer.width;
+ int h = task->buffer.h;
+ int r = task->nlm_state.r;
+ int f = task->nlm_state.f;
+ float a = task->nlm_state.a;
+ float k_2 = task->nlm_state.k_2;
+
+ int pass_stride = task->buffer.pass_stride;
+ int num_shifts = (2 * r + 1) * (2 * r + 1);
+ int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
+ int frame_offset = 0;
+
+ if (have_error())
+ return false;
+
+ CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
+ CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
+ CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts;
+ CUdeviceptr scale_ptr = 0;
+
+ cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float) * pass_stride));
+ cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float) * pass_stride));
+
+ {
+ CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput;
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
+ cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMUpdateOutput, cuFilterModule, "kernel_cuda_filter_nlm_update_output"));
+
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1));
+
+ CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts);
+
+ void *calc_difference_args[] = {&guide_ptr,
+ &variance_ptr,
+ &scale_ptr,
+ &difference,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &r,
+ &channel_offset,
+ &frame_offset,
+ &a,
+ &k_2};
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *calc_weight_args[] = {
+ &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *update_output_args[] = {&blurDifference,
+ &image_ptr,
+ &out_ptr,
+ &weightAccum,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &channel_offset,
+ &r,
+ &f};
+
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
+ }
+
+ {
+ CUfunction cuNLMNormalize;
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize"));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1));
+ void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride};
+ CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h);
+ CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args);
+ cuda_assert(cuCtxSynchronize());
+ }
+
+ return !have_error();
+ }
+
+ bool denoising_construct_transform(DenoisingTask *task)
+ {
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterConstructTransform;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED));
+ CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h);
+
+ void *args[] = {&task->buffer.mem.device_pointer,
+ &task->tile_info_mem.device_pointer,
+ &task->storage.transform.device_pointer,
+ &task->storage.rank.device_pointer,
+ &task->filter_area,
+ &task->rect,
+ &task->radius,
+ &task->pca_threshold,
+ &task->buffer.pass_stride,
+ &task->buffer.frame_stride,
+ &task->buffer.use_time};
+ CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+ }
+
+ bool denoising_accumulate(device_ptr color_ptr,
+ device_ptr color_variance_ptr,
+ device_ptr scale_ptr,
+ int frame,
+ DenoisingTask *task)
+ {
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ int r = task->radius;
+ int f = 4;
+ float a = 1.0f;
+ float k_2 = task->nlm_k_2;
+
+ int w = task->reconstruction_state.source_w;
+ int h = task->reconstruction_state.source_h;
+ int stride = task->buffer.stride;
+ int frame_offset = frame * task->buffer.frame_stride;
+ int t = task->tile_info->frames[frame];
+
+ int pass_stride = task->buffer.pass_stride;
+ int num_shifts = (2 * r + 1) * (2 * r + 1);
+
+ if (have_error())
+ return false;
+
+ CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
+ CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
+
+ CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
+ cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
+ cuda_assert(cuModuleGetFunction(
+ &cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian"));
+
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED));
+
+ CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference,
+ task->reconstruction_state.source_w *
+ task->reconstruction_state.source_h,
+ num_shifts);
+
+ void *calc_difference_args[] = {&color_ptr,
+ &color_variance_ptr,
+ &scale_ptr,
+ &difference,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &r,
+ &pass_stride,
+ &frame_offset,
+ &a,
+ &k_2};
+ void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *calc_weight_args[] = {
+ &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
+ void *construct_gramian_args[] = {&t,
+ &blurDifference,
+ &task->buffer.mem.device_pointer,
+ &task->storage.transform.device_pointer,
+ &task->storage.rank.device_pointer,
+ &task->storage.XtWX.device_pointer,
+ &task->storage.XtWY.device_pointer,
+ &task->reconstruction_state.filter_window,
+ &w,
+ &h,
+ &stride,
+ &pass_stride,
+ &r,
+ &f,
+ &frame_offset,
+ &task->buffer.use_time};
+
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+ CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+ }
+
+ bool denoising_solve(device_ptr output_ptr, DenoisingTask *task)
+ {
+ CUfunction cuFinalize;
+ cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
+ cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1));
+ void *finalize_args[] = {&output_ptr,
+ &task->storage.rank.device_pointer,
+ &task->storage.XtWX.device_pointer,
+ &task->storage.XtWY.device_pointer,
+ &task->filter_area,
+ &task->reconstruction_state.buffer_params.x,
+ &task->render_buffer.samples};
+ CUDA_GET_BLOCKSIZE(
+ cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h);
+ CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+ }
+
+ bool denoising_combine_halves(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ int r,
+ int4 rect,
+ DenoisingTask *task)
+ {
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterCombineHalves;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r};
+ CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+ }
+
+ bool denoising_divide_shadow(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr sample_variance_ptr,
+ device_ptr sv_variance_ptr,
+ device_ptr buffer_variance_ptr,
+ DenoisingTask *task)
+ {
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterDivideShadow;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->tile_info_mem.device_pointer,
+ &a_ptr,
+ &b_ptr,
+ &sample_variance_ptr,
+ &sv_variance_ptr,
+ &buffer_variance_ptr,
+ &task->rect,
+ &task->render_buffer.pass_stride,
+ &task->render_buffer.offset};
+ CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+ }
+
+ bool denoising_get_feature(int mean_offset,
+ int variance_offset,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ float scale,
+ DenoisingTask *task)
+ {
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterGetFeature;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->tile_info_mem.device_pointer,
+ &mean_offset,
+ &variance_offset,
+ &mean_ptr,
+ &variance_ptr,
+ &scale,
+ &task->rect,
+ &task->render_buffer.pass_stride,
+ &task->render_buffer.offset};
+ CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+ }
+
+ bool denoising_write_feature(int out_offset,
+ device_ptr from_ptr,
+ device_ptr buffer_ptr,
+ DenoisingTask *task)
+ {
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterWriteFeature;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w);
+
+ void *args[] = {&task->render_buffer.samples,
+ &task->reconstruction_state.buffer_params,
+ &task->filter_area,
+ &from_ptr,
+ &buffer_ptr,
+ &out_offset,
+ &task->rect};
+ CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+ }
+
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task)
+ {
+ if (have_error())
+ return false;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilterDetectOutliers;
+ cuda_assert(cuModuleGetFunction(
+ &cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
+ cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
+ CUDA_GET_BLOCKSIZE(
+ cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+ void *args[] = {&image_ptr,
+ &variance_ptr,
+ &depth_ptr,
+ &output_ptr,
+ &task->rect,
+ &task->buffer.pass_stride};
+
+ CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
+ cuda_assert(cuCtxSynchronize());
+
+ return !have_error();
+ }
+
+ void denoise(RenderTile &rtile, DenoisingTask &denoising)
+ {
+ denoising.functions.construct_transform = function_bind(
+ &CUDADevice::denoising_construct_transform, this, &denoising);
+ denoising.functions.accumulate = function_bind(
+ &CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising);
+ denoising.functions.divide_shadow = function_bind(
+ &CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.non_local_means = function_bind(
+ &CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
+ denoising.functions.combine_halves = function_bind(
+ &CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
+ denoising.functions.get_feature = function_bind(
+ &CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
+ denoising.functions.write_feature = function_bind(
+ &CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising);
+ denoising.functions.detect_outliers = function_bind(
+ &CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
+
+ denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
+ denoising.render_buffer.samples = rtile.sample;
+ denoising.buffer.gpu_temporary_mem = true;
+
+ denoising.run_denoising(&rtile);
+ }
+
+ void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
+ {
+ scoped_timer timer(&rtile.buffers->render_time);
+
+ if (have_error())
+ return;
+
+ CUDAContextScope scope(this);
+ CUfunction cuPathTrace;
+
+ /* Get kernel function. */
+ if (task.integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ }
+
+ if (have_error()) {
+ return;
+ }
+
+ cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+
+ /* Allocate work tile. */
+ work_tiles.alloc(1);
+
+ WorkTile *wtile = work_tiles.data();
+ wtile->x = rtile.x;
+ wtile->y = rtile.y;
+ wtile->w = rtile.w;
+ wtile->h = rtile.h;
+ wtile->offset = rtile.offset;
+ wtile->stride = rtile.stride;
+ wtile->buffer = (float *)cuda_device_ptr(rtile.buffer);
+
+ /* Prepare work size. More step samples render faster, but for now we
+ * remain conservative for GPUs connected to a display to avoid driver
+ * timeouts and display freezing. */
+ int min_blocks, num_threads_per_block;
+ cuda_assert(cuOccupancyMaxPotentialBlockSize(
+ &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+ if (!info.display_device) {
+ min_blocks *= 8;
+ }
+
+ uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
+
+ /* Render all samples. */
+ int start_sample = rtile.start_sample;
+ int end_sample = rtile.start_sample + rtile.num_samples;
+
+ for (int sample = start_sample; sample < end_sample; sample += step_samples) {
+ /* Setup and copy work tile to device. */
+ wtile->start_sample = sample;
+ wtile->num_samples = min(step_samples, end_sample - sample);
+ work_tiles.copy_to_device();
+
+ CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
+ uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
+ uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+
+ /* Launch kernel. */
+ void *args[] = {&d_work_tiles, &total_work_size};
+
+ cuda_assert(cuLaunchKernel(
+ cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+
+ cuda_assert(cuCtxSynchronize());
+
+ /* Update progress. */
+ rtile.sample = sample + wtile->num_samples;
+ task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+
+ if (task.get_cancel()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+ }
+ }
+
+ void film_convert(DeviceTask &task,
+ device_ptr buffer,
+ device_ptr rgba_byte,
+ device_ptr rgba_half)
+ {
+ if (have_error())
+ return;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuFilmConvert;
+ CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half);
+ CUdeviceptr d_buffer = cuda_device_ptr(buffer);
+
+ /* get kernel function */
+ if (rgba_half) {
+ cuda_assert(
+ cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
+ }
+
+ float sample_scale = 1.0f / (task.sample + 1);
+
+ /* pass in parameters */
+ void *args[] = {&d_rgba,
+ &d_buffer,
+ &sample_scale,
+ &task.x,
+ &task.y,
+ &task.w,
+ &task.h,
+ &task.offset,
+ &task.stride};
+
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(
+ &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert));
+
+ int xthreads = (int)sqrt(threads_per_block);
+ int ythreads = (int)sqrt(threads_per_block);
+ int xblocks = (task.w + xthreads - 1) / xthreads;
+ int yblocks = (task.h + ythreads - 1) / ythreads;
+
+ cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
+
+ cuda_assert(cuLaunchKernel(cuFilmConvert,
+ xblocks,
+ yblocks,
+ 1, /* blocks */
+ xthreads,
+ ythreads,
+ 1, /* threads */
+ 0,
+ 0,
+ args,
+ 0));
+
+ unmap_pixels((rgba_byte) ? rgba_byte : rgba_half);
+
+ cuda_assert(cuCtxSynchronize());
+ }
+
+ void shader(DeviceTask &task)
+ {
+ if (have_error())
+ return;
+
+ CUDAContextScope scope(this);
+
+ CUfunction cuShader;
+ CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
+ CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
+
+ /* get kernel function */
+ if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
+ }
+ else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background"));
+ }
+
+ /* do tasks in smaller chunks, so we can cancel it */
+ const int shader_chunk_size = 65536;
+ const int start = task.shader_x;
+ const int end = task.shader_x + task.shader_w;
+ int offset = task.offset;
+
+ bool canceled = false;
+ for (int sample = 0; sample < task.num_samples && !canceled; sample++) {
+ for (int shader_x = start; shader_x < end; shader_x += shader_chunk_size) {
+ int shader_w = min(shader_chunk_size, end - shader_x);
+
+ /* pass in parameters */
+ void *args[8];
+ int arg = 0;
+ args[arg++] = &d_input;
+ args[arg++] = &d_output;
+ args[arg++] = &task.shader_eval_type;
+ if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
+ args[arg++] = &task.shader_filter;
+ }
+ args[arg++] = &shader_x;
+ args[arg++] = &shader_w;
+ args[arg++] = &offset;
+ args[arg++] = &sample;
+
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(
+ &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
+
+ int xblocks = (shader_w + threads_per_block - 1) / threads_per_block;
+
+ cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuLaunchKernel(cuShader,
+ xblocks,
+ 1,
+ 1, /* blocks */
+ threads_per_block,
+ 1,
+ 1, /* threads */
+ 0,
+ 0,
+ args,
+ 0));
+
+ cuda_assert(cuCtxSynchronize());
+
+ if (task.get_cancel()) {
+ canceled = true;
+ break;
+ }
+ }
+
+ task.update_progress(NULL);
+ }
+ }
+
+ CUdeviceptr map_pixels(device_ptr mem)
+ {
+ if (!background) {
+ PixelMem pmem = pixel_mem_map[mem];
+ CUdeviceptr buffer;
+
+ size_t bytes;
+ cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0));
+ cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource));
+
+ return buffer;
+ }
+
+ return cuda_device_ptr(mem);
+ }
+
+ void unmap_pixels(device_ptr mem)
+ {
+ if (!background) {
+ PixelMem pmem = pixel_mem_map[mem];
+
+ cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0));
+ }
+ }
+
+ void pixels_alloc(device_memory &mem)
+ {
+ PixelMem pmem;
+
+ pmem.w = mem.data_width;
+ pmem.h = mem.data_height;
+
+ CUDAContextScope scope(this);
+
+ glGenBuffers(1, &pmem.cuPBO);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ if (mem.data_type == TYPE_HALF)
+ glBufferData(
+ GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(GLhalf) * 4, NULL, GL_DYNAMIC_DRAW);
+ else
+ glBufferData(
+ GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(uint8_t) * 4, NULL, GL_DYNAMIC_DRAW);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+ glActiveTexture(GL_TEXTURE0);
+ glGenTextures(1, &pmem.cuTexId);
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+ if (mem.data_type == TYPE_HALF)
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
+ else
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ glBindTexture(GL_TEXTURE_2D, 0);
+
+ CUresult result = cuGraphicsGLRegisterBuffer(
+ &pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
+
+ if (result == CUDA_SUCCESS) {
+ mem.device_pointer = pmem.cuTexId;
+ pixel_mem_map[mem.device_pointer] = pmem;
+
+ mem.device_size = mem.memory_size();
+ stats.mem_alloc(mem.device_size);
+
+ return;
+ }
+ else {
+ /* failed to register buffer, fallback to no interop */
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
+
+ background = true;
+ }
+ }
+
+ void pixels_copy_from(device_memory &mem, int y, int w, int h)
+ {
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
+
+ CUDAContextScope scope(this);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ uchar *pixels = (uchar *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
+ size_t offset = sizeof(uchar) * 4 * y * w;
+ memcpy((uchar *)mem.host_pointer + offset, pixels + offset, sizeof(uchar) * 4 * w * h);
+ glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ }
+
+ void pixels_free(device_memory &mem)
+ {
+ if (mem.device_pointer) {
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
+
+ CUDAContextScope scope(this);
+
+ cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
+ glDeleteBuffers(1, &pmem.cuPBO);
+ glDeleteTextures(1, &pmem.cuTexId);
+
+ pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
+ mem.device_pointer = 0;
+
+ stats.mem_free(mem.device_size);
+ mem.device_size = 0;
+ }
+ }
+
+ void draw_pixels(device_memory &mem,
+ int y,
+ int w,
+ int h,
+ int width,
+ int height,
+ int dx,
+ int dy,
+ int dw,
+ int dh,
+ bool transparent,
+ const DeviceDrawParams &draw_params)
+ {
+ assert(mem.type == MEM_PIXELS);
+
+ if (!background) {
+ const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
+ PixelMem pmem = pixel_mem_map[mem.device_pointer];
+ float *vpointer;
+
+ CUDAContextScope scope(this);
+
+ /* for multi devices, this assumes the inefficient method that we allocate
+ * all pixels on the device even though we only render to a subset */
+ size_t offset = 4 * y * w;
+
+ if (mem.data_type == TYPE_HALF)
+ offset *= sizeof(GLhalf);
+ else
+ offset *= sizeof(uint8_t);
+
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+ if (mem.data_type == TYPE_HALF) {
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void *)offset);
+ }
+ else {
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void *)offset);
+ }
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+ if (transparent) {
+ glEnable(GL_BLEND);
+ glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
+ }
+
+ GLint shader_program;
+ if (use_fallback_shader) {
+ if (!bind_fallback_display_space_shader(dw, dh)) {
+ return;
+ }
+ shader_program = fallback_shader_program;
+ }
+ else {
+ draw_params.bind_display_space_shader_cb();
+ glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
+ }
+
+ if (!vertex_buffer) {
+ glGenBuffers(1, &vertex_buffer);
+ }
+
+ glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
+ /* invalidate old contents -
+ * avoids stalling if buffer is still waiting in queue to be rendered */
+ glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
+
+ vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
+
+ if (vpointer) {
+ /* texture coordinate - vertex pair */
+ vpointer[0] = 0.0f;
+ vpointer[1] = 0.0f;
+ vpointer[2] = dx;
+ vpointer[3] = dy;
+
+ vpointer[4] = (float)w / (float)pmem.w;
+ vpointer[5] = 0.0f;
+ vpointer[6] = (float)width + dx;
+ vpointer[7] = dy;
+
+ vpointer[8] = (float)w / (float)pmem.w;
+ vpointer[9] = (float)h / (float)pmem.h;
+ vpointer[10] = (float)width + dx;
+ vpointer[11] = (float)height + dy;
+
+ vpointer[12] = 0.0f;
+ vpointer[13] = (float)h / (float)pmem.h;
+ vpointer[14] = dx;
+ vpointer[15] = (float)height + dy;
+
+ glUnmapBuffer(GL_ARRAY_BUFFER);
+ }
+
+ GLuint vertex_array_object;
+ GLuint position_attribute, texcoord_attribute;
+
+ glGenVertexArrays(1, &vertex_array_object);
+ glBindVertexArray(vertex_array_object);
+
+ texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
+ position_attribute = glGetAttribLocation(shader_program, "pos");
+
+ glEnableVertexAttribArray(texcoord_attribute);
+ glEnableVertexAttribArray(position_attribute);
+
+ glVertexAttribPointer(
+ texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
+ glVertexAttribPointer(position_attribute,
+ 2,
+ GL_FLOAT,
+ GL_FALSE,
+ 4 * sizeof(float),
+ (const GLvoid *)(sizeof(float) * 2));
+
+ glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ if (use_fallback_shader) {
+ glUseProgram(0);
+ }
+ else {
+ draw_params.unbind_display_space_shader_cb();
+ }
+
+ if (transparent) {
+ glDisable(GL_BLEND);
+ }
+
+ glBindTexture(GL_TEXTURE_2D, 0);
+
+ return;
+ }
+
+ Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params);
+ }
+
+ void thread_run(DeviceTask *task)
+ {
+ CUDAContextScope scope(this);
+
+ if (task->type == DeviceTask::RENDER) {
+ DeviceRequestedFeatures requested_features;
+ if (use_split_kernel()) {
+ if (split_kernel == NULL) {
+ split_kernel = new CUDASplitKernel(this);
+ split_kernel->load_kernels(requested_features);
+ }
+ }
+
+ device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
+
+ /* keep rendering tiles until done */
+ RenderTile tile;
+ DenoisingTask denoising(this, *task);
+
+ while (task->acquire_tile(this, tile)) {
+ if (tile.task == RenderTile::PATH_TRACE) {
+ if (use_split_kernel()) {
+ device_only_memory<uchar> void_buffer(this, "void_buffer");
+ split_kernel->path_trace(task, tile, void_buffer, void_buffer);
+ }
+ else {
+ path_trace(*task, tile, work_tiles);
+ }
+ }
+ else if (tile.task == RenderTile::DENOISE) {
+ tile.sample = tile.start_sample + tile.num_samples;
+
+ denoise(tile, denoising);
+
+ task->update_progress(&tile, tile.w * tile.h);
+ }
+
+ task->release_tile(tile);
+
+ if (task->get_cancel()) {
+ if (task->need_finish_queue == false)
+ break;
+ }
+ }
+
+ work_tiles.free();
+ }
+ else if (task->type == DeviceTask::SHADER) {
+ shader(*task);
+
+ cuda_assert(cuCtxSynchronize());
+ }
+ }
+
+ class CUDADeviceTask : public DeviceTask {
+ public:
+ CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task)
+ {
+ run = function_bind(&CUDADevice::thread_run, device, this);
+ }
+ };
+
+ void task_add(DeviceTask &task)
+ {
+ CUDAContextScope scope(this);
+
+ /* Load texture info. */
+ load_texture_info();
+
+ /* Synchronize all memory copies before executing task. */
+ cuda_assert(cuCtxSynchronize());
+
+ if (task.type == DeviceTask::FILM_CONVERT) {
+ /* must be done in main thread due to opengl access */
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
+ }
+ else {
+ task_pool.push(new CUDADeviceTask(this, task));
+ }
+ }
+
+ void task_wait()
+ {
+ task_pool.wait();
+ }
+
+ void task_cancel()
+ {
+ task_pool.cancel();
+ }
+
+ friend class CUDASplitKernelFunction;
+ friend class CUDASplitKernel;
+ friend class CUDAContextScope;
+};
+
+/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class
+ * now that the definition of that class is complete
+ */
+#undef cuda_assert
+#define cuda_assert(stmt) \
+ { \
+ CUresult result = stmt; \
+\
+ if (result != CUDA_SUCCESS) { \
+ string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
+ if (device->error_msg == "") \
+ device->error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ /*cuda_abort();*/ \
+ device->cuda_error_documentation(); \
+ } \
+ } \
+ (void)0
+
+/* CUDA context scope. */
+
+CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device)
+{
+ cuda_assert(cuCtxPushCurrent(device->cuContext));
+}
+
+CUDAContextScope::~CUDAContextScope()
+{
+ cuda_assert(cuCtxPopCurrent(NULL));
+}
+
+/* split kernel */
+
+class CUDASplitKernelFunction : public SplitKernelFunction {
+ CUDADevice *device;
+ CUfunction func;
+
+ public:
+ CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func)
+ {
+ }
+
+ /* enqueue the kernel, returns false if there is an error */
+ bool enqueue(const KernelDimensions &dim, device_memory & /*kg*/, device_memory & /*data*/)
+ {
+ return enqueue(dim, NULL);
+ }
+
+ /* enqueue the kernel, returns false if there is an error */
+ bool enqueue(const KernelDimensions &dim, void *args[])
+ {
+ if (device->have_error())
+ return false;
+
+ CUDAContextScope scope(device);
+
+ /* we ignore dim.local_size for now, as this is faster */
+ int threads_per_block;
+ cuda_assert(
+ cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
+
+ int xblocks = (dim.global_size[0] * dim.global_size[1] + threads_per_block - 1) /
+ threads_per_block;
+
+ cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
+
+ cuda_assert(cuLaunchKernel(func,
+ xblocks,
+ 1,
+ 1, /* blocks */
+ threads_per_block,
+ 1,
+ 1, /* threads */
+ 0,
+ 0,
+ args,
+ 0));
+
+ return !device->have_error();
+ }
+};
+
+CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device)
+{
+}
+
+uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/,
+ device_memory & /*data*/,
+ size_t num_threads)
+{
+ CUDAContextScope scope(device);
+
+ device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
+ size_buffer.alloc(1);
+ size_buffer.zero_to_device();
+
+ uint threads = num_threads;
+ CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
+
+ struct args_t {
+ uint *num_threads;
+ CUdeviceptr *size;
+ };
+
+ args_t args = {&threads, &d_size};
+
+ CUfunction state_buffer_size;
+ cuda_assert(
+ cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size"));
+
+ cuda_assert(cuLaunchKernel(state_buffer_size, 1, 1, 1, 1, 1, 1, 0, 0, (void **)&args, 0));
+
+ size_buffer.copy_from_device(0, 1, 1);
+ size_t size = size_buffer[0];
+ size_buffer.free();
+
+ return size;
+}
+
+bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim,
+ RenderTile &rtile,
+ int num_global_elements,
+ device_memory & /*kernel_globals*/,
+ device_memory & /*kernel_data*/,
+ device_memory &split_data,
+ device_memory &ray_state,
+ device_memory &queue_index,
+ device_memory &use_queues_flag,
+ device_memory &work_pool_wgs)
+{
+ CUDAContextScope scope(device);
+
+ CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer);
+ CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer);
+ CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer);
+ CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
+ CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
+
+ CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
+
+ int end_sample = rtile.start_sample + rtile.num_samples;
+ int queue_size = dim.global_size[0] * dim.global_size[1];
+
+ struct args_t {
+ CUdeviceptr *split_data_buffer;
+ int *num_elements;
+ CUdeviceptr *ray_state;
+ int *start_sample;
+ int *end_sample;
+ int *sx;
+ int *sy;
+ int *sw;
+ int *sh;
+ int *offset;
+ int *stride;
+ CUdeviceptr *queue_index;
+ int *queuesize;
+ CUdeviceptr *use_queues_flag;
+ CUdeviceptr *work_pool_wgs;
+ int *num_samples;
+ CUdeviceptr *buffer;
+ };
+
+ args_t args = {&d_split_data,
+ &num_global_elements,
+ &d_ray_state,
+ &rtile.start_sample,
+ &end_sample,
+ &rtile.x,
+ &rtile.y,
+ &rtile.w,
+ &rtile.h,
+ &rtile.offset,
+ &rtile.stride,
+ &d_queue_index,
+ &queue_size,
+ &d_use_queues_flag,
+ &d_work_pool_wgs,
+ &rtile.num_samples,
+ &d_buffer};
+
+ CUfunction data_init;
+ cuda_assert(
+ cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init"));
+ if (device->have_error()) {
+ return false;
+ }
+
+ CUDASplitKernelFunction(device, data_init).enqueue(dim, (void **)&args);
+
+ return !device->have_error();
+}
+
+SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name,
+ const DeviceRequestedFeatures &)
+{
+ CUDAContextScope scope(device);
+ CUfunction func;
+
+ cuda_assert(
+ cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()));
+ if (device->have_error()) {
+ device->cuda_error_message(
+ string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data()));
+ return NULL;
+ }
+
+ return new CUDASplitKernelFunction(device, func);
+}
+
+int2 CUDASplitKernel::split_kernel_local_size()
+{
+ return make_int2(32, 1);
+}
+
+int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg,
+ device_memory &data,
+ DeviceTask * /*task*/)
+{
+ CUDAContextScope scope(device);
+ size_t free;
+ size_t total;
+
+ cuda_assert(cuMemGetInfo(&free, &total));
+
+ VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free)
+ << " bytes. (" << string_human_readable_size(free) << ").";
+
+ size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
+ size_t side = round_down((int)sqrt(num_elements), 32);
+ int2 global_size = make_int2(side, round_down(num_elements / side, 16));
+ VLOG(1) << "Global size: " << global_size << ".";
+ return global_size;
+}
+
+bool device_cuda_init()
+{
+#ifdef WITH_CUDA_DYNLOAD
+ static bool initialized = false;
+ static bool result = false;
+
+ if (initialized)
+ return result;
+
+ initialized = true;
+ int cuew_result = cuewInit(CUEW_INIT_CUDA);
+ if (cuew_result == CUEW_SUCCESS) {
+ VLOG(1) << "CUEW initialization succeeded";
+ if (CUDADevice::have_precompiled_kernels()) {
+ VLOG(1) << "Found precompiled kernels";
+ result = true;
+ }
+# ifndef _WIN32
+ else if (cuewCompilerPath() != NULL) {
+ VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
+ result = true;
+ }
+ else {
+ VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
+ << " unable to use CUDA";
+ }
+# endif
+ }
+ else {
+ VLOG(1) << "CUEW initialization failed: "
+ << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
+ "Error opening the library");
+ }
+
+ return result;
+#else /* WITH_CUDA_DYNLOAD */
+ return true;
+#endif /* WITH_CUDA_DYNLOAD */
+}
+
+Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
+{
+ return new CUDADevice(info, stats, profiler, background);
+}
+
+static CUresult device_cuda_safe_init()
+{
+#ifdef _WIN32
+ __try {
+ return cuInit(0);
+ }
+ __except (EXCEPTION_EXECUTE_HANDLER) {
+ /* Ignore crashes inside the CUDA driver and hope we can
+ * survive even with corrupted CUDA installs. */
+ fprintf(stderr, "Cycles CUDA: driver crashed, continuing without CUDA.\n");
+ }
+
+ return CUDA_ERROR_NO_DEVICE;
+#else
+ return cuInit(0);
+#endif
+}
+
+void device_cuda_info(vector<DeviceInfo> &devices)
+{
+ CUresult result = device_cuda_safe_init();
+ if (result != CUDA_SUCCESS) {
+ if (result != CUDA_ERROR_NO_DEVICE)
+ fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result));
+ return;
+ }
+
+ int count = 0;
+ result = cuDeviceGetCount(&count);
+ if (result != CUDA_SUCCESS) {
+ fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result));
+ return;
+ }
+
+ vector<DeviceInfo> display_devices;
+
+ for (int num = 0; num < count; num++) {
+ char name[256];
+
+ result = cuDeviceGetName(name, 256, num);
+ if (result != CUDA_SUCCESS) {
+ fprintf(stderr, "CUDA cuDeviceGetName: %s\n", cuewErrorString(result));
+ continue;
+ }
+
+ int major;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
+ if (major < 3) {
+ VLOG(1) << "Ignoring device \"" << name << "\", this graphics card is no longer supported.";
+ continue;
+ }
+
+ DeviceInfo info;
+
+ info.type = DEVICE_CUDA;
+ info.description = string(name);
+ info.num = num;
+
+ info.has_half_images = (major >= 3);
+ info.has_volume_decoupled = false;
+
+ int pci_location[3] = {0, 0, 0};
+ cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
+ cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num);
+ cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num);
+ info.id = string_printf("CUDA_%s_%04x:%02x:%02x",
+ name,
+ (unsigned int)pci_location[0],
+ (unsigned int)pci_location[1],
+ (unsigned int)pci_location[2]);
+
+ /* If device has a kernel timeout and no compute preemption, we assume
+ * it is connected to a display and will freeze the display while doing
+ * computations. */
+ int timeout_attr = 0, preempt_attr = 0;
+ cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
+ cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
+
+ /* The CUDA driver reports compute preemption as not being available on
+ * Windows 10 even when it is, due to an issue in application profiles.
+ * Detect case where we expect it to be available and override. */
+ if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) {
+ VLOG(1) << "Assuming device has compute preemption on Windows 10.";
+ preempt_attr = 1;
+ }
+
+ if (timeout_attr && !preempt_attr) {
+ VLOG(1) << "Device is recognized as display.";
+ info.description += " (Display)";
+ info.display_device = true;
+ display_devices.push_back(info);
+ }
+ else {
+ VLOG(1) << "Device has compute preemption or is not used for display.";
+ devices.push_back(info);
+ }
+ VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
+ }
+
+ if (!display_devices.empty())
+ devices.insert(devices.end(), display_devices.begin(), display_devices.end());
+}
+
+string device_cuda_capabilities()
+{
+ CUresult result = device_cuda_safe_init();
+ if (result != CUDA_SUCCESS) {
+ if (result != CUDA_ERROR_NO_DEVICE) {
+ return string("Error initializing CUDA: ") + cuewErrorString(result);
+ }
+ return "No CUDA device found\n";
+ }
+
+ int count;
+ result = cuDeviceGetCount(&count);
+ if (result != CUDA_SUCCESS) {
+ return string("Error getting devices: ") + cuewErrorString(result);
+ }
+
+ string capabilities = "";
+ for (int num = 0; num < count; num++) {
+ char name[256];
+ if (cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) {
+ continue;
+ }
+ capabilities += string("\t") + name + "\n";
+ int value;
+#define GET_ATTR(attr) \
+ { \
+ if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
+ capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
+ } \
+ } \
+ (void)0
+ /* TODO(sergey): Strip all attributes which are not useful for us
+ * or does not depend on the driver.
+ */
+ GET_ATTR(MAX_THREADS_PER_BLOCK);
+ GET_ATTR(MAX_BLOCK_DIM_X);
+ GET_ATTR(MAX_BLOCK_DIM_Y);
+ GET_ATTR(MAX_BLOCK_DIM_Z);
+ GET_ATTR(MAX_GRID_DIM_X);
+ GET_ATTR(MAX_GRID_DIM_Y);
+ GET_ATTR(MAX_GRID_DIM_Z);
+ GET_ATTR(MAX_SHARED_MEMORY_PER_BLOCK);
+ GET_ATTR(SHARED_MEMORY_PER_BLOCK);
+ GET_ATTR(TOTAL_CONSTANT_MEMORY);
+ GET_ATTR(WARP_SIZE);
+ GET_ATTR(MAX_PITCH);
+ GET_ATTR(MAX_REGISTERS_PER_BLOCK);
+ GET_ATTR(REGISTERS_PER_BLOCK);
+ GET_ATTR(CLOCK_RATE);
+ GET_ATTR(TEXTURE_ALIGNMENT);
+ GET_ATTR(GPU_OVERLAP);
+ GET_ATTR(MULTIPROCESSOR_COUNT);
+ GET_ATTR(KERNEL_EXEC_TIMEOUT);
+ GET_ATTR(INTEGRATED);
+ GET_ATTR(CAN_MAP_HOST_MEMORY);
+ GET_ATTR(COMPUTE_MODE);
+ GET_ATTR(MAXIMUM_TEXTURE1D_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_HEIGHT);
+ GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT);
+ GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT);
+ GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_LAYERS);
+ GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT);
+ GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES);
+ GET_ATTR(SURFACE_ALIGNMENT);
+ GET_ATTR(CONCURRENT_KERNELS);
+ GET_ATTR(ECC_ENABLED);
+ GET_ATTR(TCC_DRIVER);
+ GET_ATTR(MEMORY_CLOCK_RATE);
+ GET_ATTR(GLOBAL_MEMORY_BUS_WIDTH);
+ GET_ATTR(L2_CACHE_SIZE);
+ GET_ATTR(MAX_THREADS_PER_MULTIPROCESSOR);
+ GET_ATTR(ASYNC_ENGINE_COUNT);
+ GET_ATTR(UNIFIED_ADDRESSING);
+ GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_LAYERS);
+ GET_ATTR(CAN_TEX2D_GATHER);
+ GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_HEIGHT);
+ GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE);
+ GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE);
+ GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE);
+ GET_ATTR(TEXTURE_PITCH_ALIGNMENT);
+ GET_ATTR(MAXIMUM_TEXTURECUBEMAP_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS);
+ GET_ATTR(MAXIMUM_SURFACE1D_WIDTH);
+ GET_ATTR(MAXIMUM_SURFACE2D_WIDTH);
+ GET_ATTR(MAXIMUM_SURFACE2D_HEIGHT);
+ GET_ATTR(MAXIMUM_SURFACE3D_WIDTH);
+ GET_ATTR(MAXIMUM_SURFACE3D_HEIGHT);
+ GET_ATTR(MAXIMUM_SURFACE3D_DEPTH);
+ GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_WIDTH);
+ GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_LAYERS);
+ GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_WIDTH);
+ GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_HEIGHT);
+ GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_LAYERS);
+ GET_ATTR(MAXIMUM_SURFACECUBEMAP_WIDTH);
+ GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH);
+ GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS);
+ GET_ATTR(MAXIMUM_TEXTURE1D_LINEAR_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_HEIGHT);
+ GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_PITCH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH);
+ GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT);
+ GET_ATTR(COMPUTE_CAPABILITY_MAJOR);
+ GET_ATTR(COMPUTE_CAPABILITY_MINOR);
+ GET_ATTR(MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH);
+ GET_ATTR(STREAM_PRIORITIES_SUPPORTED);
+ GET_ATTR(GLOBAL_L1_CACHE_SUPPORTED);
+ GET_ATTR(LOCAL_L1_CACHE_SUPPORTED);
+ GET_ATTR(MAX_SHARED_MEMORY_PER_MULTIPROCESSOR);
+ GET_ATTR(MAX_REGISTERS_PER_MULTIPROCESSOR);
+ GET_ATTR(MANAGED_MEMORY);
+ GET_ATTR(MULTI_GPU_BOARD);
+ GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
+#undef GET_ATTR
+ capabilities += "\n";
+ }
+
+ return capabilities;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
--- a/intern/cycles/device/device_split_kernel.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_split_kernel.cpp 2020-01-10 20:42:43.460923388 +0300
@@ -55,6 +55,10 @@
kernel_next_iteration_setup = NULL;
kernel_indirect_subsurface = NULL;
kernel_buffer_update = NULL;
+ kernel_adaptive_stopping = NULL;
+ kernel_adaptive_filter_x = NULL;
+ kernel_adaptive_filter_y = NULL;
+ kernel_adaptive_adjust_samples = NULL;
}
DeviceSplitKernel::~DeviceSplitKernel()
@@ -83,6 +87,10 @@
delete kernel_next_iteration_setup;
delete kernel_indirect_subsurface;
delete kernel_buffer_update;
+ delete kernel_adaptive_stopping;
+ delete kernel_adaptive_filter_x;
+ delete kernel_adaptive_filter_y;
+ delete kernel_adaptive_adjust_samples;
}
bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features)
@@ -114,6 +122,10 @@
LOAD_KERNEL(next_iteration_setup);
LOAD_KERNEL(indirect_subsurface);
LOAD_KERNEL(buffer_update);
+ LOAD_KERNEL(adaptive_stopping);
+ LOAD_KERNEL(adaptive_filter_x);
+ LOAD_KERNEL(adaptive_filter_y);
+ LOAD_KERNEL(adaptive_adjust_samples);
#undef LOAD_KERNEL
@@ -208,6 +220,19 @@
RenderTile subtile = tile;
subtile.start_sample = tile.sample;
+
+ if (task->integrator_adaptive) {
+ int step_samples = subtile.start_sample % 4;
+ /* Round so that we end up on multiples of four for adaptive sampling. */
+ if (step_samples == 3) {
+ step_samples = 2;
+ }
+ else if (step_samples > 4) {
+ step_samples &= 0xfffffffc;
+ }
+ samples_per_second = max(1, step_samples - (subtile.start_sample % 4));
+ }
+
subtile.num_samples = min(samples_per_second,
tile.start_sample + tile.num_samples - tile.sample);
@@ -302,6 +327,22 @@
}
}
+ if (task->integrator_adaptive && ((tile.sample + subtile.num_samples - 1) & 3) == 3) {
+ size_t buffer_size[2];
+ buffer_size[0] = round_up(tile.w, local_size[0]);
+ buffer_size[1] = round_up(tile.h, local_size[1]);
+ kernel_adaptive_stopping->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ buffer_size[0] = round_up(tile.h, local_size[0]);
+ buffer_size[1] = round_up(1, local_size[1]);
+ kernel_adaptive_filter_x->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ buffer_size[0] = round_up(tile.w, local_size[0]);
+ buffer_size[1] = round_up(1, local_size[1]);
+ kernel_adaptive_filter_y->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ }
+
double time_per_sample = ((time_dt() - start_time) / subtile.num_samples);
if (avg_time_per_sample == 0.0) {
@@ -324,6 +365,28 @@
}
}
+ if (task->integrator_adaptive) {
+ /* Reset the start samples. */
+ RenderTile subtile = tile;
+ subtile.start_sample = tile.start_sample;
+ subtile.num_samples = tile.sample - tile.start_sample;
+ enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
+ subtile,
+ num_global_elements,
+ kgbuffer,
+ kernel_data,
+ split_data,
+ ray_state,
+ queue_index,
+ use_queues_flag,
+ work_pool_wgs);
+ size_t buffer_size[2];
+ buffer_size[0] = round_up(tile.w, local_size[0]);
+ buffer_size[1] = round_up(tile.h, local_size[1]);
+ kernel_adaptive_adjust_samples->enqueue(
+ KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+ }
+
return true;
}
diff -Naur a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
--- a/intern/cycles/device/device_split_kernel.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_split_kernel.h 2020-01-10 20:42:43.460923388 +0300
@@ -75,6 +75,10 @@
SplitKernelFunction *kernel_next_iteration_setup;
SplitKernelFunction *kernel_indirect_subsurface;
SplitKernelFunction *kernel_buffer_update;
+ SplitKernelFunction *kernel_adaptive_stopping;
+ SplitKernelFunction *kernel_adaptive_filter_x;
+ SplitKernelFunction *kernel_adaptive_filter_y;
+ SplitKernelFunction *kernel_adaptive_adjust_samples;
/* Global memory variables [porting]; These memory is used for
* co-operation between different kernels; Data written by one
diff -Naur a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
--- a/intern/cycles/device/device_task.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_task.h 2020-01-10 20:42:43.460923388 +0300
@@ -114,6 +114,7 @@
bool need_finish_queue;
bool integrator_branched;
+ bool integrator_adaptive;
int2 requested_tile_size;
protected:
diff -Naur a/intern/cycles/device/device_task.h.orig b/intern/cycles/device/device_task.h.orig
--- a/intern/cycles/device/device_task.h.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/device/device_task.h.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DEVICE_TASK_H__
+#define __DEVICE_TASK_H__
+
+#include "device/device_memory.h"
+
+#include "util/util_function.h"
+#include "util/util_list.h"
+#include "util/util_task.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Device Task */
+
+class Device;
+class RenderBuffers;
+class RenderTile;
+class Tile;
+
+class DenoiseParams {
+ public:
+ /* Pixel radius for neighboring pixels to take into account. */
+ int radius;
+ /* Controls neighbor pixel weighting for the denoising filter. */
+ float strength;
+ /* Preserve more or less detail based on feature passes. */
+ float feature_strength;
+ /* When removing pixels that don't carry information,
+ * use a relative threshold instead of an absolute one. */
+ bool relative_pca;
+ /* How many frames before and after the current center frame are included. */
+ int neighbor_frames;
+ /* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
+ bool clamp_input;
+ /* Controls which passes the OptiX AI denoiser should use as input. */
+ int optix_input_passes;
+
+ DenoiseParams()
+ {
+ radius = 8;
+ strength = 0.5f;
+ feature_strength = 0.5f;
+ relative_pca = false;
+ neighbor_frames = 2;
+ clamp_input = true;
+ optix_input_passes = 1;
+ }
+};
+
+class DeviceTask : public Task {
+ public:
+ typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
+ Type type;
+
+ int x, y, w, h;
+ device_ptr rgba_byte;
+ device_ptr rgba_half;
+ device_ptr buffer;
+ int sample;
+ int num_samples;
+ int offset, stride;
+
+ device_ptr shader_input;
+ device_ptr shader_output;
+ int shader_eval_type;
+ int shader_filter;
+ int shader_x, shader_w;
+
+ int passes_size;
+
+ explicit DeviceTask(Type type = RENDER);
+
+ int get_subtask_count(int num, int max_size = 0);
+ void split(list<DeviceTask> &tasks, int num, int max_size = 0);
+
+ void update_progress(RenderTile *rtile, int pixel_samples = -1);
+
+ function<bool(Device *device, RenderTile &)> acquire_tile;
+ function<void(long, int)> update_progress_sample;
+ function<void(RenderTile &)> update_tile_sample;
+ function<void(RenderTile &)> release_tile;
+ function<bool()> get_cancel;
+ function<void(RenderTile *, Device *)> map_neighbor_tiles;
+ function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
+
+ DenoiseParams denoising;
+ bool denoising_from_render;
+ vector<int> denoising_frames;
+
+ bool denoising_do_filter;
+ bool denoising_use_optix;
+ bool denoising_write_passes;
+
+ int pass_stride;
+ int frame_stride;
+ int target_pass_stride;
+ int pass_denoising_data;
+ int pass_denoising_clean;
+
+ bool need_finish_queue;
+ bool integrator_branched;
+ int2 requested_tile_size;
+
+ protected:
+ double last_update_time;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __DEVICE_TASK_H__ */
diff -Naur a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
--- a/intern/cycles/device/opencl/opencl.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/opencl/opencl.h 2020-01-10 20:42:43.460923388 +0300
@@ -445,6 +445,7 @@
device_ptr rgba_byte,
device_ptr rgba_half);
void shader(DeviceTask &task);
+ void update_adaptive(DeviceTask &task, RenderTile &tile, int sample);
void denoise(RenderTile &tile, DenoisingTask &denoising);
diff -Naur a/intern/cycles/device/opencl/opencl.h.orig b/intern/cycles/device/opencl/opencl.h.orig
--- a/intern/cycles/device/opencl/opencl.h.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/device/opencl/opencl.h.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,656 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPENCL
+
+# include "device/device.h"
+# include "device/device_denoising.h"
+# include "device/device_split_kernel.h"
+
+# include "util/util_map.h"
+# include "util/util_param.h"
+# include "util/util_string.h"
+
+# include "clew.h"
+
+# include "device/opencl/memory_manager.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Disable workarounds, seems to be working fine on latest drivers. */
+# define CYCLES_DISABLE_DRIVER_WORKAROUNDS
+
+/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */
+# ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
+/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
+# undef clEnqueueNDRangeKernel
+# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
+ CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \
+ clFinish(a);
+
+# undef clEnqueueWriteBuffer
+# define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \
+ CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \
+ clFinish(a);
+
+# undef clEnqueueReadBuffer
+# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
+ CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
+ clFinish(a);
+# endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
+
+# define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
+
+struct OpenCLPlatformDevice {
+ OpenCLPlatformDevice(cl_platform_id platform_id,
+ const string &platform_name,
+ cl_device_id device_id,
+ cl_device_type device_type,
+ const string &device_name,
+ const string &hardware_id,
+ const string &device_extensions)
+ : platform_id(platform_id),
+ platform_name(platform_name),
+ device_id(device_id),
+ device_type(device_type),
+ device_name(device_name),
+ hardware_id(hardware_id),
+ device_extensions(device_extensions)
+ {
+ }
+ cl_platform_id platform_id;
+ string platform_name;
+ cl_device_id device_id;
+ cl_device_type device_type;
+ string device_name;
+ string hardware_id;
+ string device_extensions;
+};
+
+/* Contains all static OpenCL helper functions. */
+class OpenCLInfo {
+ public:
+ static cl_device_type device_type();
+ static bool use_debug();
+ static bool device_supported(const string &platform_name, const cl_device_id device_id);
+ static bool platform_version_check(cl_platform_id platform, string *error = NULL);
+ static bool device_version_check(cl_device_id device, string *error = NULL);
+ static string get_hardware_id(const string &platform_name, cl_device_id device_id);
+ static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
+ bool force_all = false);
+
+ /* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
+
+ /* Platform information. */
+ static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL);
+ static cl_uint get_num_platforms();
+
+ static bool get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error = NULL);
+ static vector<cl_platform_id> get_platforms();
+
+ static bool get_platform_name(cl_platform_id platform_id, string *platform_name);
+ static string get_platform_name(cl_platform_id platform_id);
+
+ static bool get_num_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type,
+ cl_uint *num_devices,
+ cl_int *error = NULL);
+ static cl_uint get_num_platform_devices(cl_platform_id platform_id, cl_device_type device_type);
+
+ static bool get_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type,
+ vector<cl_device_id> *device_ids,
+ cl_int *error = NULL);
+ static vector<cl_device_id> get_platform_devices(cl_platform_id platform_id,
+ cl_device_type device_type);
+
+ /* Device information. */
+ static bool get_device_name(cl_device_id device_id, string *device_name, cl_int *error = NULL);
+
+ static string get_device_name(cl_device_id device_id);
+
+ static bool get_device_extensions(cl_device_id device_id,
+ string *device_extensions,
+ cl_int *error = NULL);
+
+ static string get_device_extensions(cl_device_id device_id);
+
+ static bool get_device_type(cl_device_id device_id,
+ cl_device_type *device_type,
+ cl_int *error = NULL);
+ static cl_device_type get_device_type(cl_device_id device_id);
+
+ static bool get_driver_version(cl_device_id device_id,
+ int *major,
+ int *minor,
+ cl_int *error = NULL);
+
+ static int mem_sub_ptr_alignment(cl_device_id device_id);
+
+ /* Get somewhat more readable device name.
+ * Main difference is AMD OpenCL here which only gives code name
+ * for the regular device name. This will give more sane device
+ * name using some extensions.
+ */
+ static string get_readable_device_name(cl_device_id device_id);
+};
+
+/* Thread safe cache for contexts and programs.
+ */
+class OpenCLCache {
+ struct Slot {
+ struct ProgramEntry {
+ ProgramEntry();
+ ProgramEntry(const ProgramEntry &rhs);
+ ~ProgramEntry();
+ cl_program program;
+ thread_mutex *mutex;
+ };
+
+ Slot();
+ Slot(const Slot &rhs);
+ ~Slot();
+
+ thread_mutex *context_mutex;
+ cl_context context;
+ typedef map<ustring, ProgramEntry> EntryMap;
+ EntryMap programs;
+ };
+
+ /* key is combination of platform ID and device ID */
+ typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair;
+
+ /* map of Slot objects */
+ typedef map<PlatformDevicePair, Slot> CacheMap;
+ CacheMap cache;
+
+ /* MD5 hash of the kernel source. */
+ string kernel_md5;
+
+ thread_mutex cache_lock;
+ thread_mutex kernel_md5_lock;
+
+ /* lazy instantiate */
+ static OpenCLCache &global_instance();
+
+ public:
+ enum ProgramName {
+ OCL_DEV_BASE_PROGRAM,
+ OCL_DEV_MEGAKERNEL_PROGRAM,
+ };
+
+ /* Lookup context in the cache. If this returns NULL, slot_locker
+ * will be holding a lock for the cache. slot_locker should refer to a
+ * default constructed thread_scoped_lock. */
+ static cl_context get_context(cl_platform_id platform,
+ cl_device_id device,
+ thread_scoped_lock &slot_locker);
+ /* Same as above. */
+ static cl_program get_program(cl_platform_id platform,
+ cl_device_id device,
+ ustring key,
+ thread_scoped_lock &slot_locker);
+
+ /* Store context in the cache. You MUST have tried to get the item before storing to it. */
+ static void store_context(cl_platform_id platform,
+ cl_device_id device,
+ cl_context context,
+ thread_scoped_lock &slot_locker);
+ /* Same as above. */
+ static void store_program(cl_platform_id platform,
+ cl_device_id device,
+ cl_program program,
+ ustring key,
+ thread_scoped_lock &slot_locker);
+
+ static string get_kernel_md5();
+};
+
+# define opencl_device_assert(device, stmt) \
+ { \
+ cl_int err = stmt; \
+\
+ if (err != CL_SUCCESS) { \
+ string message = string_printf( \
+ "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
+ if ((device)->error_message() == "") \
+ (device)->set_error(message); \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ } \
+ } \
+ (void)0
+
+# define opencl_assert(stmt) \
+ { \
+ cl_int err = stmt; \
+\
+ if (err != CL_SUCCESS) { \
+ string message = string_printf( \
+ "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
+ if (error_msg == "") \
+ error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ } \
+ } \
+ (void)0
+
+class OpenCLDevice : public Device {
+ public:
+ DedicatedTaskPool task_pool;
+
+ /* Task pool for required kernels (base, AO kernels during foreground rendering) */
+ TaskPool load_required_kernel_task_pool;
+ /* Task pool for optional kernels (feature kernels during foreground rendering) */
+ TaskPool load_kernel_task_pool;
+ cl_context cxContext;
+ cl_command_queue cqCommandQueue;
+ cl_platform_id cpPlatform;
+ cl_device_id cdDevice;
+ cl_int ciErr;
+ int device_num;
+ bool use_preview_kernels;
+
+ class OpenCLProgram {
+ public:
+ OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL)
+ {
+ }
+ OpenCLProgram(OpenCLDevice *device,
+ const string &program_name,
+ const string &kernel_name,
+ const string &kernel_build_options,
+ bool use_stdout = true);
+ ~OpenCLProgram();
+
+ void add_kernel(ustring name);
+
+ /* Try to load the program from device cache or disk */
+ bool load();
+ /* Compile the kernel (first separate, failback to local) */
+ void compile();
+ /* Create the OpenCL kernels after loading or compiling */
+ void create_kernels();
+
+ bool is_loaded() const
+ {
+ return loaded;
+ }
+ const string &get_log() const
+ {
+ return log;
+ }
+ void report_error();
+
+ /* Wait until this kernel is available to be used
+ * It will return true when the kernel is available.
+ * It will return false when the kernel is not available
+ * or could not be loaded. */
+ bool wait_for_availability();
+
+ cl_kernel operator()();
+ cl_kernel operator()(ustring name);
+
+ void release();
+
+ private:
+ bool build_kernel(const string *debug_src);
+ /* Build the program by calling the own process.
+ * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
+ * build calls internally if they come from the same process.
+ * If that is not supported, this function just returns false.
+ */
+ bool compile_separate(const string &clbin);
+ /* Build the program by calling OpenCL directly. */
+ bool compile_kernel(const string *debug_src);
+ /* Loading and saving the program from/to disk. */
+ bool load_binary(const string &clbin, const string *debug_src = NULL);
+ bool save_binary(const string &clbin);
+
+ void add_log(const string &msg, bool is_debug);
+ void add_error(const string &msg);
+
+ bool loaded;
+ bool needs_compiling;
+
+ cl_program program;
+ OpenCLDevice *device;
+
+ /* Used for the OpenCLCache key. */
+ string program_name;
+
+ string kernel_file, kernel_build_options, device_md5;
+
+ bool use_stdout;
+ string log, error_msg;
+ string compile_output;
+
+ map<ustring, cl_kernel> kernels;
+ };
+
+ /* Container for all types of split programs. */
+ class OpenCLSplitPrograms {
+ public:
+ OpenCLDevice *device;
+ OpenCLProgram program_split;
+ OpenCLProgram program_lamp_emission;
+ OpenCLProgram program_do_volume;
+ OpenCLProgram program_indirect_background;
+ OpenCLProgram program_shader_eval;
+ OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+ OpenCLProgram program_subsurface_scatter;
+ OpenCLProgram program_direct_lighting;
+ OpenCLProgram program_shadow_blocked_ao;
+ OpenCLProgram program_shadow_blocked_dl;
+
+ OpenCLSplitPrograms(OpenCLDevice *device);
+ ~OpenCLSplitPrograms();
+
+ /* Load the kernels and put the created kernels in the given
+ * `programs` parameter. */
+ void load_kernels(vector<OpenCLProgram *> &programs,
+ const DeviceRequestedFeatures &requested_features,
+ bool is_preview = false);
+ };
+
+ DeviceSplitKernel *split_kernel;
+
+ OpenCLProgram base_program;
+ OpenCLProgram bake_program;
+ OpenCLProgram displace_program;
+ OpenCLProgram background_program;
+ OpenCLProgram denoising_program;
+
+ OpenCLSplitPrograms kernel_programs;
+ OpenCLSplitPrograms preview_programs;
+
+ typedef map<string, device_vector<uchar> *> ConstMemMap;
+ typedef map<string, device_ptr> MemMap;
+
+ ConstMemMap const_mem_map;
+ MemMap mem_map;
+
+ bool device_initialized;
+ string platform_name;
+ string device_name;
+
+ bool opencl_error(cl_int err);
+ void opencl_error(const string &message);
+ void opencl_assert_err(cl_int err, const char *where);
+
+ OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+ ~OpenCLDevice();
+
+ static void CL_CALLBACK context_notify_callback(const char *err_info,
+ const void * /*private_info*/,
+ size_t /*cb*/,
+ void *user_data);
+
+ bool opencl_version_check();
+ OpenCLSplitPrograms *get_split_programs();
+
+ string device_md5_hash(string kernel_custom_build_options = "");
+ bool load_kernels(const DeviceRequestedFeatures &requested_features);
+ void load_required_kernels(const DeviceRequestedFeatures &requested_features);
+ void load_preview_kernels();
+
+ bool wait_for_availability(const DeviceRequestedFeatures &requested_features);
+ DeviceKernelStatus get_active_kernel_switch_state();
+
+ /* Get the name of the opencl program for the given kernel */
+ const string get_opencl_program_name(const string &kernel_name);
+ /* Get the program file name to compile (*.cl) for the given kernel */
+ const string get_opencl_program_filename(const string &kernel_name);
+ string get_build_options(const DeviceRequestedFeatures &requested_features,
+ const string &opencl_program_name,
+ bool preview_kernel = false);
+ /* Enable the default features to reduce recompilation events */
+ void enable_default_features(DeviceRequestedFeatures &features);
+
+ void mem_alloc(device_memory &mem);
+ void mem_copy_to(device_memory &mem);
+ void mem_copy_from(device_memory &mem, int y, int w, int h, int elem);
+ void mem_zero(device_memory &mem);
+ void mem_free(device_memory &mem);
+
+ int mem_sub_ptr_alignment();
+
+ void const_copy_to(const char *name, void *host, size_t size);
+ void tex_alloc(device_memory &mem);
+ void tex_free(device_memory &mem);
+
+ size_t global_size_round_up(int group_size, int global_size);
+ void enqueue_kernel(cl_kernel kernel,
+ size_t w,
+ size_t h,
+ bool x_workgroups = false,
+ size_t max_workgroup_size = -1);
+ void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
+ void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
+
+ void film_convert(DeviceTask &task,
+ device_ptr buffer,
+ device_ptr rgba_byte,
+ device_ptr rgba_half);
+ void shader(DeviceTask &task);
+
+ void denoise(RenderTile &tile, DenoisingTask &denoising);
+
+ class OpenCLDeviceTask : public DeviceTask {
+ public:
+ OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task)
+ {
+ run = function_bind(&OpenCLDevice::thread_run, device, this);
+ }
+ };
+
+ int get_split_task_count(DeviceTask & /*task*/)
+ {
+ return 1;
+ }
+
+ void task_add(DeviceTask &task)
+ {
+ task_pool.push(new OpenCLDeviceTask(this, task));
+ }
+
+ void task_wait()
+ {
+ task_pool.wait();
+ }
+
+ void task_cancel()
+ {
+ task_pool.cancel();
+ }
+
+ void thread_run(DeviceTask *task);
+
+ virtual BVHLayoutMask get_bvh_layout_mask() const
+ {
+ return BVH_LAYOUT_BVH2;
+ }
+
+ virtual bool show_samples() const
+ {
+ return true;
+ }
+
+ protected:
+ string kernel_build_options(const string *debug_src = NULL);
+
+ void mem_zero_kernel(device_ptr ptr, size_t size);
+
+ bool denoising_non_local_means(device_ptr image_ptr,
+ device_ptr guide_ptr,
+ device_ptr variance_ptr,
+ device_ptr out_ptr,
+ DenoisingTask *task);
+ bool denoising_construct_transform(DenoisingTask *task);
+ bool denoising_accumulate(device_ptr color_ptr,
+ device_ptr color_variance_ptr,
+ device_ptr scale_ptr,
+ int frame,
+ DenoisingTask *task);
+ bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
+ bool denoising_combine_halves(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ int r,
+ int4 rect,
+ DenoisingTask *task);
+ bool denoising_divide_shadow(device_ptr a_ptr,
+ device_ptr b_ptr,
+ device_ptr sample_variance_ptr,
+ device_ptr sv_variance_ptr,
+ device_ptr buffer_variance_ptr,
+ DenoisingTask *task);
+ bool denoising_get_feature(int mean_offset,
+ int variance_offset,
+ device_ptr mean_ptr,
+ device_ptr variance_ptr,
+ float scale,
+ DenoisingTask *task);
+ bool denoising_write_feature(int to_offset,
+ device_ptr from_ptr,
+ device_ptr buffer_ptr,
+ DenoisingTask *task);
+ bool denoising_detect_outliers(device_ptr image_ptr,
+ device_ptr variance_ptr,
+ device_ptr depth_ptr,
+ device_ptr output_ptr,
+ DenoisingTask *task);
+
+ device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int size);
+ void mem_free_sub_ptr(device_ptr ptr);
+
+ class ArgumentWrapper {
+ public:
+ ArgumentWrapper() : size(0), pointer(NULL)
+ {
+ }
+
+ ArgumentWrapper(device_memory &argument)
+ : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
+ {
+ }
+
+ template<typename T>
+ ArgumentWrapper(device_vector<T> &argument)
+ : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
+ {
+ }
+
+ template<typename T>
+ ArgumentWrapper(device_only_memory<T> &argument)
+ : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
+ {
+ }
+ template<typename T> ArgumentWrapper(T &argument) : size(sizeof(argument)), pointer(&argument)
+ {
+ }
+
+ ArgumentWrapper(int argument) : size(sizeof(int)), int_value(argument), pointer(&int_value)
+ {
+ }
+
+ ArgumentWrapper(float argument)
+ : size(sizeof(float)), float_value(argument), pointer(&float_value)
+ {
+ }
+
+ size_t size;
+ int int_value;
+ float float_value;
+ void *pointer;
+ };
+
+ /* TODO(sergey): In the future we can use variadic templates, once
+ * C++0x is allowed. Should allow to clean this up a bit.
+ */
+ int kernel_set_args(cl_kernel kernel,
+ int start_argument_index,
+ const ArgumentWrapper &arg1 = ArgumentWrapper(),
+ const ArgumentWrapper &arg2 = ArgumentWrapper(),
+ const ArgumentWrapper &arg3 = ArgumentWrapper(),
+ const ArgumentWrapper &arg4 = ArgumentWrapper(),
+ const ArgumentWrapper &arg5 = ArgumentWrapper(),
+ const ArgumentWrapper &arg6 = ArgumentWrapper(),
+ const ArgumentWrapper &arg7 = ArgumentWrapper(),
+ const ArgumentWrapper &arg8 = ArgumentWrapper(),
+ const ArgumentWrapper &arg9 = ArgumentWrapper(),
+ const ArgumentWrapper &arg10 = ArgumentWrapper(),
+ const ArgumentWrapper &arg11 = ArgumentWrapper(),
+ const ArgumentWrapper &arg12 = ArgumentWrapper(),
+ const ArgumentWrapper &arg13 = ArgumentWrapper(),
+ const ArgumentWrapper &arg14 = ArgumentWrapper(),
+ const ArgumentWrapper &arg15 = ArgumentWrapper(),
+ const ArgumentWrapper &arg16 = ArgumentWrapper(),
+ const ArgumentWrapper &arg17 = ArgumentWrapper(),
+ const ArgumentWrapper &arg18 = ArgumentWrapper(),
+ const ArgumentWrapper &arg19 = ArgumentWrapper(),
+ const ArgumentWrapper &arg20 = ArgumentWrapper(),
+ const ArgumentWrapper &arg21 = ArgumentWrapper(),
+ const ArgumentWrapper &arg22 = ArgumentWrapper(),
+ const ArgumentWrapper &arg23 = ArgumentWrapper(),
+ const ArgumentWrapper &arg24 = ArgumentWrapper(),
+ const ArgumentWrapper &arg25 = ArgumentWrapper(),
+ const ArgumentWrapper &arg26 = ArgumentWrapper(),
+ const ArgumentWrapper &arg27 = ArgumentWrapper(),
+ const ArgumentWrapper &arg28 = ArgumentWrapper(),
+ const ArgumentWrapper &arg29 = ArgumentWrapper(),
+ const ArgumentWrapper &arg30 = ArgumentWrapper(),
+ const ArgumentWrapper &arg31 = ArgumentWrapper(),
+ const ArgumentWrapper &arg32 = ArgumentWrapper(),
+ const ArgumentWrapper &arg33 = ArgumentWrapper());
+
+ void release_kernel_safe(cl_kernel kernel);
+ void release_mem_object_safe(cl_mem mem);
+ void release_program_safe(cl_program program);
+
+ /* ** Those guys are for workign around some compiler-specific bugs ** */
+
+ cl_program load_cached_kernel(ustring key, thread_scoped_lock &cache_locker);
+
+ void store_cached_kernel(cl_program program, ustring key, thread_scoped_lock &cache_locker);
+
+ private:
+ MemoryManager memory_manager;
+ friend class MemoryManager;
+
+ static_assert_align(TextureInfo, 16);
+ device_vector<TextureInfo> texture_info;
+
+ typedef map<string, device_memory *> TexturesMap;
+ TexturesMap textures;
+
+ bool textures_need_update;
+
+ protected:
+ void flush_texture_buffers();
+
+ friend class OpenCLSplitKernel;
+ friend class OpenCLSplitKernelFunction;
+};
+
+Device *opencl_create_split_device(DeviceInfo &info,
+ Stats &stats,
+ Profiler &profiler,
+ bool background);
+
+CCL_NAMESPACE_END
+
+#endif
diff -Naur a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
--- a/intern/cycles/device/opencl/opencl_split.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/opencl/opencl_split.cpp 2020-01-10 20:42:43.460923388 +0300
@@ -56,7 +56,11 @@
"enqueue_inactive "
"next_iteration_setup "
"indirect_subsurface "
- "buffer_update";
+ "buffer_update "
+ "adaptive_stopping "
+ "adaptive_filter_x "
+ "adaptive_filter_y "
+ "adaptive_adjust_samples";
const string OpenCLDevice::get_opencl_program_name(const string &kernel_name)
{
@@ -283,6 +287,10 @@
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_stopping);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_x);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_y);
+ ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_adjust_samples);
programs.push_back(&program_split);
# undef ADD_SPLIT_KERNEL_PROGRAM
diff -Naur a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
--- a/intern/cycles/kernel/CMakeLists.txt 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/CMakeLists.txt 2020-01-10 20:42:43.460923388 +0300
@@ -36,6 +36,10 @@
)
set(SRC_OPENCL_KERNELS
+ kernels/opencl/kernel_adaptive_stopping.cl
+ kernels/opencl/kernel_adaptive_filter_x.cl
+ kernels/opencl/kernel_adaptive_filter_y.cl
+ kernels/opencl/kernel_adaptive_adjust_samples.cl
kernels/opencl/kernel_bake.cl
kernels/opencl/kernel_base.cl
kernels/opencl/kernel_displace.cl
@@ -94,6 +98,7 @@
set(SRC_HEADERS
kernel_accumulate.h
+ kernel_adaptive_sampling.h
kernel_bake.h
kernel_camera.h
kernel_color.h
@@ -323,6 +328,10 @@
)
set(SRC_SPLIT_HEADERS
+ split/kernel_adaptive_adjust_samples.h
+ split/kernel_adaptive_filter_x.h
+ split/kernel_adaptive_filter_y.h
+ split/kernel_adaptive_stopping.h
split/kernel_branched.h
split/kernel_buffer_update.h
split/kernel_data_init.h
diff -Naur a/intern/cycles/kernel/CMakeLists.txt.orig b/intern/cycles/kernel/CMakeLists.txt.orig
--- a/intern/cycles/kernel/CMakeLists.txt.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/CMakeLists.txt.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,662 @@
+remove_extra_strict_flags()
+
+set(INC
+ ..
+)
+
+set(INC_SYS
+
+)
+
+set(SRC_CPU_KERNELS
+ kernels/cpu/kernel.cpp
+ kernels/cpu/kernel_sse2.cpp
+ kernels/cpu/kernel_sse3.cpp
+ kernels/cpu/kernel_sse41.cpp
+ kernels/cpu/kernel_avx.cpp
+ kernels/cpu/kernel_avx2.cpp
+ kernels/cpu/kernel_split.cpp
+ kernels/cpu/kernel_split_sse2.cpp
+ kernels/cpu/kernel_split_sse3.cpp
+ kernels/cpu/kernel_split_sse41.cpp
+ kernels/cpu/kernel_split_avx.cpp
+ kernels/cpu/kernel_split_avx2.cpp
+ kernels/cpu/filter.cpp
+ kernels/cpu/filter_sse2.cpp
+ kernels/cpu/filter_sse3.cpp
+ kernels/cpu/filter_sse41.cpp
+ kernels/cpu/filter_avx.cpp
+ kernels/cpu/filter_avx2.cpp
+)
+
+set(SRC_CUDA_KERNELS
+ kernels/cuda/kernel.cu
+ kernels/cuda/kernel_split.cu
+ kernels/cuda/filter.cu
+)
+
+set(SRC_OPENCL_KERNELS
+ kernels/opencl/kernel_bake.cl
+ kernels/opencl/kernel_base.cl
+ kernels/opencl/kernel_displace.cl
+ kernels/opencl/kernel_background.cl
+ kernels/opencl/kernel_state_buffer_size.cl
+ kernels/opencl/kernel_split_bundle.cl
+ kernels/opencl/kernel_data_init.cl
+ kernels/opencl/kernel_path_init.cl
+ kernels/opencl/kernel_queue_enqueue.cl
+ kernels/opencl/kernel_scene_intersect.cl
+ kernels/opencl/kernel_lamp_emission.cl
+ kernels/opencl/kernel_do_volume.cl
+ kernels/opencl/kernel_indirect_background.cl
+ kernels/opencl/kernel_shader_setup.cl
+ kernels/opencl/kernel_shader_sort.cl
+ kernels/opencl/kernel_shader_eval.cl
+ kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+ kernels/opencl/kernel_subsurface_scatter.cl
+ kernels/opencl/kernel_direct_lighting.cl
+ kernels/opencl/kernel_shadow_blocked_ao.cl
+ kernels/opencl/kernel_shadow_blocked_dl.cl
+ kernels/opencl/kernel_enqueue_inactive.cl
+ kernels/opencl/kernel_next_iteration_setup.cl
+ kernels/opencl/kernel_indirect_subsurface.cl
+ kernels/opencl/kernel_buffer_update.cl
+ kernels/opencl/filter.cl
+)
+
+set(SRC_OPTIX_KERNELS
+ kernels/optix/kernel_optix.cu
+)
+
+set(SRC_BVH_HEADERS
+ bvh/bvh.h
+ bvh/bvh_nodes.h
+ bvh/bvh_shadow_all.h
+ bvh/bvh_local.h
+ bvh/bvh_traversal.h
+ bvh/bvh_types.h
+ bvh/bvh_volume.h
+ bvh/bvh_volume_all.h
+ bvh/qbvh_nodes.h
+ bvh/qbvh_shadow_all.h
+ bvh/qbvh_local.h
+ bvh/qbvh_traversal.h
+ bvh/qbvh_volume.h
+ bvh/qbvh_volume_all.h
+ bvh/obvh_nodes.h
+ bvh/obvh_shadow_all.h
+ bvh/obvh_local.h
+ bvh/obvh_traversal.h
+ bvh/obvh_volume.h
+ bvh/obvh_volume_all.h
+ bvh/bvh_embree.h
+)
+
+set(SRC_HEADERS
+ kernel_accumulate.h
+ kernel_bake.h
+ kernel_camera.h
+ kernel_color.h
+ kernel_compat_cpu.h
+ kernel_compat_cuda.h
+ kernel_compat_optix.h
+ kernel_compat_opencl.h
+ kernel_differential.h
+ kernel_emission.h
+ kernel_film.h
+ kernel_globals.h
+ kernel_id_passes.h
+ kernel_jitter.h
+ kernel_light.h
+ kernel_math.h
+ kernel_montecarlo.h
+ kernel_passes.h
+ kernel_path.h
+ kernel_path_branched.h
+ kernel_path_common.h
+ kernel_path_state.h
+ kernel_path_surface.h
+ kernel_path_subsurface.h
+ kernel_path_volume.h
+ kernel_profiling.h
+ kernel_projection.h
+ kernel_queues.h
+ kernel_random.h
+ kernel_shader.h
+ kernel_shadow.h
+ kernel_subsurface.h
+ kernel_textures.h
+ kernel_types.h
+ kernel_volume.h
+ kernel_work_stealing.h
+ kernel_write_passes.h
+)
+
+set(SRC_KERNELS_CPU_HEADERS
+ kernel.h
+ kernels/cpu/kernel_cpu.h
+ kernels/cpu/kernel_cpu_impl.h
+ kernels/cpu/kernel_cpu_image.h
+ kernels/cpu/filter_cpu.h
+ kernels/cpu/filter_cpu_impl.h
+)
+
+set(SRC_KERNELS_CUDA_HEADERS
+ kernels/cuda/kernel_config.h
+ kernels/cuda/kernel_cuda_image.h
+)
+
+set(SRC_KERNELS_OPTIX_HEADERS
+)
+
+set(SRC_KERNELS_OPENCL_HEADERS
+ kernels/opencl/kernel_split_function.h
+ kernels/opencl/kernel_opencl_image.h
+)
+
+set(SRC_CLOSURE_HEADERS
+ closure/alloc.h
+ closure/bsdf.h
+ closure/bsdf_ashikhmin_velvet.h
+ closure/bsdf_diffuse.h
+ closure/bsdf_diffuse_ramp.h
+ closure/bsdf_microfacet.h
+ closure/bsdf_microfacet_multi.h
+ closure/bsdf_microfacet_multi_impl.h
+ closure/bsdf_oren_nayar.h
+ closure/bsdf_phong_ramp.h
+ closure/bsdf_reflection.h
+ closure/bsdf_refraction.h
+ closure/bsdf_toon.h
+ closure/bsdf_transparent.h
+ closure/bsdf_util.h
+ closure/bsdf_ashikhmin_shirley.h
+ closure/bsdf_hair.h
+ closure/bssrdf.h
+ closure/emissive.h
+ closure/volume.h
+ closure/bsdf_principled_diffuse.h
+ closure/bsdf_principled_sheen.h
+ closure/bsdf_hair_principled.h
+)
+
+set(SRC_SVM_HEADERS
+ svm/svm.h
+ svm/svm_ao.h
+ svm/svm_aov.h
+ svm/svm_attribute.h
+ svm/svm_bevel.h
+ svm/svm_blackbody.h
+ svm/svm_bump.h
+ svm/svm_camera.h
+ svm/svm_clamp.h
+ svm/svm_closure.h
+ svm/svm_convert.h
+ svm/svm_checker.h
+ svm/svm_color_util.h
+ svm/svm_brick.h
+ svm/svm_displace.h
+ svm/svm_fresnel.h
+ svm/svm_wireframe.h
+ svm/svm_wavelength.h
+ svm/svm_gamma.h
+ svm/svm_brightness.h
+ svm/svm_geometry.h
+ svm/svm_gradient.h
+ svm/svm_hsv.h
+ svm/svm_ies.h
+ svm/svm_image.h
+ svm/svm_invert.h
+ svm/svm_light_path.h
+ svm/svm_magic.h
+ svm/svm_map_range.h
+ svm/svm_mapping.h
+ svm/svm_mapping_util.h
+ svm/svm_math.h
+ svm/svm_math_util.h
+ svm/svm_mix.h
+ svm/svm_musgrave.h
+ svm/svm_noise.h
+ svm/svm_noisetex.h
+ svm/svm_normal.h
+ svm/svm_ramp.h
+ svm/svm_ramp_util.h
+ svm/svm_sepcomb_hsv.h
+ svm/svm_sepcomb_vector.h
+ svm/svm_sky.h
+ svm/svm_tex_coord.h
+ svm/svm_fractal_noise.h
+ svm/svm_types.h
+ svm/svm_value.h
+ svm/svm_vector_transform.h
+ svm/svm_voronoi.h
+ svm/svm_voxel.h
+ svm/svm_wave.h
+ svm/svm_white_noise.h
+ svm/svm_vertex_color.h
+)
+
+set(SRC_GEOM_HEADERS
+ geom/geom.h
+ geom/geom_attribute.h
+ geom/geom_curve.h
+ geom/geom_curve_intersect.h
+ geom/geom_motion_curve.h
+ geom/geom_motion_triangle.h
+ geom/geom_motion_triangle_intersect.h
+ geom/geom_motion_triangle_shader.h
+ geom/geom_object.h
+ geom/geom_patch.h
+ geom/geom_primitive.h
+ geom/geom_subd_triangle.h
+ geom/geom_triangle.h
+ geom/geom_triangle_intersect.h
+ geom/geom_volume.h
+)
+
+set(SRC_FILTER_HEADERS
+ filter/filter.h
+ filter/filter_defines.h
+ filter/filter_features.h
+ filter/filter_features_sse.h
+ filter/filter_kernel.h
+ filter/filter_nlm_cpu.h
+ filter/filter_nlm_gpu.h
+ filter/filter_prefilter.h
+ filter/filter_reconstruction.h
+ filter/filter_transform.h
+ filter/filter_transform_gpu.h
+ filter/filter_transform_sse.h
+)
+
+set(SRC_UTIL_HEADERS
+ ../util/util_atomic.h
+ ../util/util_color.h
+ ../util/util_defines.h
+ ../util/util_half.h
+ ../util/util_hash.h
+ ../util/util_math.h
+ ../util/util_math_fast.h
+ ../util/util_math_intersect.h
+ ../util/util_math_float2.h
+ ../util/util_math_float3.h
+ ../util/util_math_float4.h
+ ../util/util_math_int2.h
+ ../util/util_math_int3.h
+ ../util/util_math_int4.h
+ ../util/util_math_matrix.h
+ ../util/util_projection.h
+ ../util/util_rect.h
+ ../util/util_static_assert.h
+ ../util/util_transform.h
+ ../util/util_texture.h
+ ../util/util_types.h
+ ../util/util_types_float2.h
+ ../util/util_types_float2_impl.h
+ ../util/util_types_float3.h
+ ../util/util_types_float3_impl.h
+ ../util/util_types_float4.h
+ ../util/util_types_float4_impl.h
+ ../util/util_types_float8.h
+ ../util/util_types_float8_impl.h
+ ../util/util_types_int2.h
+ ../util/util_types_int2_impl.h
+ ../util/util_types_int3.h
+ ../util/util_types_int3_impl.h
+ ../util/util_types_int4.h
+ ../util/util_types_int4_impl.h
+ ../util/util_types_uchar2.h
+ ../util/util_types_uchar2_impl.h
+ ../util/util_types_uchar3.h
+ ../util/util_types_uchar3_impl.h
+ ../util/util_types_uchar4.h
+ ../util/util_types_uchar4_impl.h
+ ../util/util_types_uint2.h
+ ../util/util_types_uint2_impl.h
+ ../util/util_types_uint3.h
+ ../util/util_types_uint3_impl.h
+ ../util/util_types_uint4.h
+ ../util/util_types_uint4_impl.h
+ ../util/util_types_ushort4.h
+ ../util/util_types_vector3.h
+ ../util/util_types_vector3_impl.h
+)
+
+set(SRC_SPLIT_HEADERS
+ split/kernel_branched.h
+ split/kernel_buffer_update.h
+ split/kernel_data_init.h
+ split/kernel_direct_lighting.h
+ split/kernel_do_volume.h
+ split/kernel_enqueue_inactive.h
+ split/kernel_holdout_emission_blurring_pathtermination_ao.h
+ split/kernel_indirect_background.h
+ split/kernel_indirect_subsurface.h
+ split/kernel_lamp_emission.h
+ split/kernel_next_iteration_setup.h
+ split/kernel_path_init.h
+ split/kernel_queue_enqueue.h
+ split/kernel_scene_intersect.h
+ split/kernel_shader_setup.h
+ split/kernel_shader_sort.h
+ split/kernel_shader_eval.h
+ split/kernel_shadow_blocked_ao.h
+ split/kernel_shadow_blocked_dl.h
+ split/kernel_split_common.h
+ split/kernel_split_data.h
+ split/kernel_split_data_types.h
+ split/kernel_subsurface_scatter.h
+)
+
+set(LIB
+
+)
+
+# CUDA module
+
+if(WITH_CYCLES_CUDA_BINARIES)
+ # 64 bit only
+ set(CUDA_BITS 64)
+
+ # CUDA version
+ execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
+ string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
+ string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
+ set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
+
+ # warn for other versions
+ if(CUDA_VERSION MATCHES "101")
+ else()
+ message(WARNING
+ "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
+ "build may succeed but only CUDA 10.1 is officially supported")
+ endif()
+
+ # build for each arch
+ set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ )
+ set(cuda_filter_sources kernels/cuda/filter.cu
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_FILTER_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ )
+ set(cuda_cubins)
+
+ macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
+ if(${arch} MATCHES "compute_.*")
+ set(format "ptx")
+ else()
+ set(format "cubin")
+ endif()
+ set(cuda_file ${name}_${arch}.${format})
+
+ set(kernel_sources ${sources})
+ if(NOT ${prev_arch} STREQUAL "none")
+ if(${prev_arch} MATCHES "compute_.*")
+ set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
+ else()
+ set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
+ endif()
+ endif()
+
+ set(cuda_kernel_src "/kernels/cuda/${name}.cu")
+
+ set(cuda_flags
+ -D CCL_NAMESPACE_BEGIN=
+ -D CCL_NAMESPACE_END=
+ -D NVCC
+ -m ${CUDA_BITS}
+ -I ${CMAKE_CURRENT_SOURCE_DIR}/..
+ -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
+ --use_fast_math
+ -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file})
+
+ if(${experimental})
+ set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
+ set(name ${name}_experimental)
+ endif()
+
+ if(WITH_CYCLES_DEBUG)
+ set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
+ endif()
+
+ if(WITH_CYCLES_CUBIN_COMPILER)
+ string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
+
+ # Needed to find libnvrtc-builtins.so. Can't do it from inside
+ # cycles_cubin_cc since the env variable is read before main()
+ if(APPLE)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
+ elseif(UNIX)
+ set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+ -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+ endif()
+
+ add_custom_command(
+ OUTPUT ${cuda_cubin}
+ COMMAND ${CUBIN_CC_ENV}
+ "$<TARGET_FILE:cycles_cubin_cc>"
+ -target ${CUDA_ARCH}
+ -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+ ${cuda_flags}
+ -v
+ -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
+ DEPENDS ${kernel_sources} cycles_cubin_cc)
+ set(cuda_file ${cuda_cubin})
+ else()
+ add_custom_command(
+ OUTPUT ${cuda_file}
+ COMMAND ${CUDA_NVCC_EXECUTABLE}
+ -arch=${arch}
+ ${CUDA_NVCC_FLAGS}
+ --${format}
+ ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+ --ptxas-options="-v"
+ ${cuda_flags}
+ DEPENDS ${kernel_sources})
+ endif()
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
+ list(APPEND cuda_cubins ${cuda_file})
+
+ unset(cuda_debug_flags)
+ endmacro()
+
+ set(prev_arch "none")
+ foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
+ if(${arch} MATCHES "sm_2.")
+ message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
+ elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
+ message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
+ else()
+ # Compile regular kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
+ CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
+
+ if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
+ # Compile split kernel
+ CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE)
+ endif()
+
+ if(WITH_CYCLES_CUDA_BUILD_SERIAL)
+ set(prev_arch ${arch})
+ endif()
+ endif()
+ endforeach()
+
+ add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
+ cycles_set_solution_folder(cycles_kernel_cuda)
+endif()
+
+# OptiX PTX modules
+
+if(WITH_CYCLES_DEVICE_OPTIX)
+ foreach(input ${SRC_OPTIX_KERNELS})
+ get_filename_component(input_we ${input} NAME_WE)
+
+ set(output "${CMAKE_CURRENT_BINARY_DIR}/${input_we}.ptx")
+ set(cuda_flags
+ -I "${OPTIX_INCLUDE_DIR}"
+ -I "${CMAKE_CURRENT_SOURCE_DIR}/.."
+ -I "${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda"
+ -arch=sm_30
+ --use_fast_math
+ -o ${output})
+
+ if(WITH_CYCLES_DEBUG)
+ set(cuda_flags ${cuda_flags}
+ -D __KERNEL_DEBUG__)
+ endif()
+
+ add_custom_command(
+ OUTPUT
+ ${output}
+ DEPENDS
+ ${input}
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPTIX_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_UTIL_HEADERS}
+ COMMAND
+ ${CUDA_NVCC_EXECUTABLE} --ptx ${cuda_flags} ${input}
+ WORKING_DIRECTORY
+ "${CMAKE_CURRENT_SOURCE_DIR}")
+
+ list(APPEND optix_ptx ${output})
+
+ delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
+ endforeach()
+
+ add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
+ cycles_set_solution_folder(cycles_kernel_optix)
+endif()
+
+# OSL module
+
+if(WITH_CYCLES_OSL)
+ list(APPEND LIB
+ cycles_kernel_osl
+ )
+ add_subdirectory(osl)
+ add_subdirectory(shaders)
+endif()
+
+# CPU module
+
+include_directories(${INC})
+include_directories(SYSTEM ${INC_SYS})
+
+if(WITH_COMPILER_ASAN)
+ if(CMAKE_COMPILER_IS_GNUCC AND (NOT WITH_CYCLES_KERNEL_ASAN))
+ # GCC hangs compiling the big kernel files with asan and release, so disable by default.
+ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=all")
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr")
+ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
+ # With OSL, Cycles disables rtti in some modules, wich then breaks at linking
+ # when trying to use vptr sanitizer (included into 'undefined' general option).
+ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=vptr")
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr")
+ endif()
+endif()
+
+set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
+set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
+set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
+
+if(CXX_HAS_SSE)
+ set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+endif()
+
+if(CXX_HAS_AVX)
+ set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+endif()
+
+if(CXX_HAS_AVX2)
+ set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+endif()
+
+cycles_add_library(cycles_kernel "${LIB}"
+ ${SRC_CPU_KERNELS}
+ ${SRC_CUDA_KERNELS}
+ ${SRC_OPTIX_KERNELS}
+ ${SRC_OPENCL_KERNELS}
+ ${SRC_HEADERS}
+ ${SRC_KERNELS_CPU_HEADERS}
+ ${SRC_KERNELS_CUDA_HEADERS}
+ ${SRC_KERNELS_OPTIX_HEADERS}
+ ${SRC_KERNELS_OPENCL_HEADERS}
+ ${SRC_BVH_HEADERS}
+ ${SRC_CLOSURE_HEADERS}
+ ${SRC_FILTER_HEADERS}
+ ${SRC_SVM_HEADERS}
+ ${SRC_GEOM_HEADERS}
+ ${SRC_SPLIT_HEADERS}
+)
+
+source_group("bvh" FILES ${SRC_BVH_HEADERS})
+source_group("closure" FILES ${SRC_CLOSURE_HEADERS})
+source_group("filter" FILES ${SRC_FILTER_HEADERS})
+source_group("geom" FILES ${SRC_GEOM_HEADERS})
+source_group("kernel" FILES ${SRC_HEADERS})
+source_group("kernel\\split" FILES ${SRC_SPLIT_HEADERS})
+source_group("kernels\\cpu" FILES ${SRC_CPU_KERNELS} ${SRC_KERNELS_CPU_HEADERS})
+source_group("kernels\\cuda" FILES ${SRC_CUDA_KERNELS} ${SRC_KERNELS_CUDA_HEADERS})
+source_group("kernels\\opencl" FILES ${SRC_OPENCL_KERNELS} ${SRC_KERNELS_OPENCL_HEADERS})
+source_group("kernels\\optix" FILES ${SRC_OPTIX_KERNELS} ${SRC_KERNELS_OPTIX_HEADERS})
+source_group("svm" FILES ${SRC_SVM_HEADERS})
+
+if(WITH_CYCLES_CUDA)
+ add_dependencies(cycles_kernel cycles_kernel_cuda)
+endif()
+if(WITH_CYCLES_DEVICE_OPTIX)
+ add_dependencies(cycles_kernel cycles_kernel_optix)
+endif()
+
+# OpenCL kernel
+
+# set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
+# add_custom_command(
+# OUTPUT ${KERNEL_PREPROCESSED}
+# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
+# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
+# add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
+# delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
+
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPENCL_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CUDA_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPTIX_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/optix)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPENCL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/optix)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_FILTER_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/filter)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/util)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/split)
diff -Naur a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h 2020-01-10 20:42:43.464256721 +0300
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_ADAPTIVE_SAMPLING_H__
+#define __KERNEL_ADAPTIVE_SAMPLING_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
+
+ccl_device void kernel_do_adaptive_stopping(KernelGlobals *kg,
+ ccl_global float *buffer,
+ int sample)
+{
+ /* TODO Stefan: Is this better in linear, sRGB or something else? */
+ float4 I = *((ccl_global float4 *)buffer);
+ float4 A = *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ /* The per pixel error as seen in section 2.1 of
+ * "A hierarchical automatic stopping condition for Monte Carlo global illumination"
+ * A small epsilon is added to the divisor to prevent division by zero. */
+ float error = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) /
+ (sample * 0.0001f + sqrtf(I.x + I.y + I.z));
+ if (error < kernel_data.integrator.adaptive_threshold * (float)sample) {
+ /* Set the fourth component to non-zero value to indicate that this pixel has converged. */
+ buffer[kernel_data.film.pass_adaptive_aux_buffer + 3] += 1.0f;
+ }
+}
+
+/* Adjust the values of an adaptively sampled pixel. */
+
+ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg,
+ ccl_global float *buffer,
+ float sample_multiplier)
+{
+ *(ccl_global float4 *)(buffer) *= sample_multiplier;
+
+ /* Scale the aux pass too, this is necessary for progressive rendering to work properly. */
+ kernel_assert(kernel_data.film.pass_adaptive_aux_buffer);
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer) *= sample_multiplier;
+
+#ifdef __PASSES__
+ int flag = kernel_data.film.pass_flag;
+
+ if (flag & PASSMASK(SHADOW))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_shadow) *= sample_multiplier;
+
+ if (flag & PASSMASK(MIST))
+ *(ccl_global float *)(buffer + kernel_data.film.pass_mist) *= sample_multiplier;
+
+ if (flag & PASSMASK(NORMAL))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_normal) *= sample_multiplier;
+
+ if (flag & PASSMASK(UV))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_uv) *= sample_multiplier;
+
+ if (flag & PASSMASK(MOTION)) {
+ *(ccl_global float4 *)(buffer + kernel_data.film.pass_motion) *= sample_multiplier;
+ *(ccl_global float *)(buffer + kernel_data.film.pass_motion_weight) *= sample_multiplier;
+ }
+
+ if (kernel_data.film.use_light_pass) {
+ int light_flag = kernel_data.film.light_pass_flag;
+
+ if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(SUBSURFACE_INDIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_subsurface_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(VOLUME_INDIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_indirect) *= sample_multiplier;
+ if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(SUBSURFACE_DIRECT))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_subsurface_direct) *= sample_multiplier;
+ if (light_flag & PASSMASK(VOLUME_DIRECT))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_direct) *= sample_multiplier;
+
+ if (light_flag & PASSMASK(EMISSION))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_emission) *= sample_multiplier;
+ if (light_flag & PASSMASK(BACKGROUND))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_background) *= sample_multiplier;
+ if (light_flag & PASSMASK(AO))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_ao) *= sample_multiplier;
+
+ if (light_flag & PASSMASK(DIFFUSE_COLOR))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_color) *= sample_multiplier;
+ if (light_flag & PASSMASK(GLOSSY_COLOR))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_color) *= sample_multiplier;
+ if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+ *(ccl_global float3 *)(buffer +
+ kernel_data.film.pass_transmission_color) *= sample_multiplier;
+ if (light_flag & PASSMASK(SUBSURFACE_COLOR))
+ *(ccl_global float3 *)(buffer + kernel_data.film.pass_subsurface_color) *= sample_multiplier;
+ }
+#endif
+
+#ifdef __DENOISING_FEATURES__
+
+# define scale_float3_variance(buffer, offset, scale) \
+ *(buffer + offset) *= scale; \
+ *(buffer + offset + 1) *= scale; \
+ *(buffer + offset + 2) *= scale; \
+ *(buffer + offset + 3) *= scale * scale; \
+ *(buffer + offset + 4) *= scale * scale; \
+ *(buffer + offset + 5) *= scale * scale;
+
+# define scale_shadow_variance(buffer, offset, scale) \
+ *(buffer + offset) *= scale; \
+ *(buffer + offset + 1) *= scale; \
+ *(buffer + offset + 2) *= scale * scale;
+
+ if (kernel_data.film.pass_denoising_data) {
+ scale_shadow_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_A, sample_multiplier);
+ scale_shadow_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_B, sample_multiplier);
+ if (kernel_data.film.pass_denoising_clean) {
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+ *(buffer + kernel_data.film.pass_denoising_clean) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_clean + 1) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_clean + 2) *= sample_multiplier;
+ }
+ else {
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+ }
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample_multiplier);
+ scale_float3_variance(
+ buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample_multiplier);
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH) *= sample_multiplier;
+ *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH +
+ 1) *= sample_multiplier * sample_multiplier;
+ }
+#endif /* __DENOISING_FEATURES__ */
+
+ if (kernel_data.film.cryptomatte_passes) {
+ int num_slots = 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) ? 1 : 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) ? 1 : 0;
+ num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) ? 1 : 0;
+ num_slots = num_slots * 2 * kernel_data.film.cryptomatte_depth;
+ ccl_global float2 *id_buffer = (ccl_global float2 *)(buffer +
+ kernel_data.film.pass_cryptomatte);
+ for (int slot = 0; slot < num_slots; slot++) {
+ id_buffer[slot].y *= sample_multiplier;
+ }
+ }
+}
+
+/* This is a simple box filter in two passes.
+ * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
+
+ccl_device bool kernel_do_adaptive_filter_x(KernelGlobals *kg, int y, ccl_global WorkTile *tile)
+{
+ bool any = false;
+ bool prev = false;
+ for (int x = tile->x; x < tile->x + tile->w; ++x) {
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w == 0.0f) {
+ any = true;
+ if (x > tile->x && !prev) {
+ index = index - 1;
+ buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ aux->w = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ aux->w = 0.0f;
+ }
+ prev = false;
+ }
+ }
+ return any;
+}
+
+ccl_device bool kernel_do_adaptive_filter_y(KernelGlobals *kg, int x, ccl_global WorkTile *tile)
+{
+ bool prev = false;
+ bool any = false;
+ for (int y = tile->y; y < tile->y + tile->h; ++y) {
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w == 0.0f) {
+ any = true;
+ if (y > tile->y && !prev) {
+ index = index - tile->stride;
+ buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ aux->w = 0.0f;
+ }
+ prev = true;
+ }
+ else {
+ if (prev) {
+ aux->w = 0.0f;
+ }
+ prev = false;
+ }
+ }
+ return any;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __KERNEL_ADAPTIVE_SAMPLING_H__ */
diff -Naur a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
--- a/intern/cycles/kernel/kernel_passes.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_passes.h 2020-01-10 20:42:43.464256721 +0300
@@ -29,7 +29,9 @@
if (kernel_data.film.pass_denoising_data == 0)
return;
- buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+ buffer += sample_is_even(kernel_data.integrator.sampling_pattern, sample) ?
+ DENOISING_PASS_SHADOW_B :
+ DENOISING_PASS_SHADOW_A;
path_total = ensure_finite(path_total);
path_total_shaded = ensure_finite(path_total_shaded);
@@ -383,6 +385,38 @@
#ifdef __KERNEL_DEBUG__
kernel_write_debug_passes(kg, buffer, L);
#endif
+
+ /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping criteria.
+ This is the heuristic from "A hierarchical automatic stopping condition for Monte Carlo global illumination"
+ except that here it is applied per pixel and not in hierarchical tiles. */
+ if (kernel_data.film.pass_adaptive_aux_buffer &&
+ kernel_data.integrator.adaptive_threshold > 0.0f) {
+ if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer,
+ make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f));
+ }
+#ifdef __KERNEL_CPU__
+ if (sample >= kernel_data.integrator.adaptive_min_samples - 1 && (sample & 0x3) == 3) {
+ kernel_do_adaptive_stopping(kg, buffer, sample);
+ }
+#endif
+ }
+
+ /* Write the sample count as negative numbers initially to mark the samples as in progress.
+ * Once the tile has finished rendering, the sign gets flipped and all the pixel values
+ * are scaled as if they were taken at a uniform sample count. */
+ if (kernel_data.film.pass_sample_count) {
+ /* Make sure it's a negative number. In progressive refine mode, this bit gets flipped between passes. */
+#ifdef __ATOMIC_PASS_WRITE__
+ atomic_fetch_and_or_uint32((ccl_global uint *)(buffer + kernel_data.film.pass_sample_count),
+ 0x80000000);
+#else
+ if (buffer[kernel_data.film.pass_sample_count] > 0) {
+ buffer[kernel_data.film.pass_sample_count] *= -1.0f;
+ }
+#endif
+ kernel_write_pass_float(buffer + kernel_data.film.pass_sample_count, -1.0f);
+ }
}
CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernel_passes.h.orig b/intern/cycles/kernel/kernel_passes.h.orig
--- a/intern/cycles/kernel/kernel_passes.h.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernel_passes.h.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,388 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_id_passes.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __DENOISING_FEATURES__
+
+ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
+ ccl_global float *buffer,
+ int sample,
+ float path_total,
+ float path_total_shaded)
+{
+ if (kernel_data.film.pass_denoising_data == 0)
+ return;
+
+ buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+
+ path_total = ensure_finite(path_total);
+ path_total_shaded = ensure_finite(path_total_shaded);
+
+ kernel_write_pass_float(buffer, path_total);
+ kernel_write_pass_float(buffer + 1, path_total_shaded);
+
+ float value = path_total_shaded / max(path_total, 1e-7f);
+ kernel_write_pass_float(buffer + 2, value * value);
+}
+
+ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ PathRadiance *L)
+{
+ if (state->denoising_feature_weight == 0.0f) {
+ return;
+ }
+
+ L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length);
+
+ /* Skip implicitly transparent surfaces. */
+ if (sd->flag & SD_HAS_ONLY_VOLUME) {
+ return;
+ }
+
+ float3 normal = make_float3(0.0f, 0.0f, 0.0f);
+ float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
+ float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
+ continue;
+
+ /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
+ normal += sc->N * sc->sample_weight;
+ sum_weight += sc->sample_weight;
+ if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
+ float3 closure_albedo = sc->weight;
+ /* Closures that include a Fresnel term typically have weights close to 1 even though their
+ * actual contribution is significantly lower.
+ * To account for this, we scale their weight by the average fresnel factor (the same is also
+ * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
+ if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
+ MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+ closure_albedo *= bsdf->extra->fresnel_color;
+ }
+
+ albedo += closure_albedo;
+ sum_nonspecular_weight += sc->sample_weight;
+ }
+ }
+
+ /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
+ if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) {
+ if (sum_weight != 0.0f) {
+ normal /= sum_weight;
+ }
+
+ /* Transform normal into camera space. */
+ const Transform worldtocamera = kernel_data.cam.worldtocamera;
+ normal = transform_direction(&worldtocamera, normal);
+
+ L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
+ L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
+
+ state->denoising_feature_weight = 0.0f;
+ }
+}
+#endif /* __DENOISING_FEATURES__ */
+
+#ifdef __KERNEL_DEBUG__
+ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
+ ccl_global float *buffer,
+ PathRadiance *L)
+{
+ int flag = kernel_data.film.pass_flag;
+ if (flag & PASSMASK(BVH_TRAVERSED_NODES)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
+ L->debug_data.num_bvh_traversed_nodes);
+ }
+ if (flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
+ L->debug_data.num_bvh_traversed_instances);
+ }
+ if (flag & PASSMASK(BVH_INTERSECTIONS)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
+ L->debug_data.num_bvh_intersections);
+ }
+ if (flag & PASSMASK(RAY_BOUNCES)) {
+ kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
+ L->debug_data.num_ray_bounces);
+ }
+}
+#endif /* __KERNEL_DEBUG__ */
+
+#ifdef __KERNEL_CPU__
+# define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \
+ kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name)
+ccl_device_inline size_t kernel_write_id_pass_cpu(
+ float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map)
+{
+ if (map) {
+ (*map)[id] += matte_weight;
+ return 0;
+ }
+#else /* __KERNEL_CPU__ */
+# define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \
+ kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight)
+ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer,
+ size_t depth,
+ float id,
+ float matte_weight)
+{
+#endif /* __KERNEL_CPU__ */
+ kernel_write_id_slots(buffer, depth, id, matte_weight);
+ return depth * 2;
+}
+
+ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg,
+ ccl_global float *buffer,
+ PathRadiance *L,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ float3 throughput)
+{
+#ifdef __PASSES__
+ int path_flag = state->flag;
+
+ if (!(path_flag & PATH_RAY_CAMERA))
+ return;
+
+ int flag = kernel_data.film.pass_flag;
+ int light_flag = kernel_data.film.light_pass_flag;
+
+ if (!((flag | light_flag) & PASS_ANY))
+ return;
+
+ if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
+ if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
+ average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
+ if (state->sample == 0) {
+ if (flag & PASSMASK(DEPTH)) {
+ float depth = camera_distance(kg, sd->P);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
+ }
+ if (flag & PASSMASK(OBJECT_ID)) {
+ float id = object_pass_id(kg, sd->object);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
+ }
+ if (flag & PASSMASK(MATERIAL_ID)) {
+ float id = shader_pass_id(kg, sd);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
+ }
+ }
+
+ if (flag & PASSMASK(NORMAL)) {
+ float3 normal = shader_bsdf_average_normal(kg, sd);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
+ }
+ if (flag & PASSMASK(UV)) {
+ float3 uv = primitive_uv(kg, sd);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
+ }
+ if (flag & PASSMASK(MOTION)) {
+ float4 speed = primitive_motion_vector(kg, sd);
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
+ kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
+ }
+
+ state->flag |= PATH_RAY_SINGLE_PASS_DONE;
+ }
+ }
+
+ if (kernel_data.film.cryptomatte_passes) {
+ const float matte_weight = average(throughput) *
+ (1.0f - average(shader_bsdf_transparency(kg, sd)));
+ if (matte_weight > 0.0f) {
+ ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+ if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+ float id = object_cryptomatte_id(kg, sd->object);
+ cryptomatte_buffer += WRITE_ID_SLOT(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object);
+ }
+ if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+ float id = shader_cryptomatte_id(kg, sd->shader);
+ cryptomatte_buffer += WRITE_ID_SLOT(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material);
+ }
+ if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+ float id = object_cryptomatte_asset_id(kg, sd->object);
+ cryptomatte_buffer += WRITE_ID_SLOT(
+ cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset);
+ }
+ }
+ }
+
+ if (light_flag & PASSMASK_COMPONENT(DIFFUSE))
+ L->color_diffuse += shader_bsdf_diffuse(kg, sd) * throughput;
+ if (light_flag & PASSMASK_COMPONENT(GLOSSY))
+ L->color_glossy += shader_bsdf_glossy(kg, sd) * throughput;
+ if (light_flag & PASSMASK_COMPONENT(TRANSMISSION))
+ L->color_transmission += shader_bsdf_transmission(kg, sd) * throughput;
+ if (light_flag & PASSMASK_COMPONENT(SUBSURFACE))
+ L->color_subsurface += shader_bsdf_subsurface(kg, sd) * throughput;
+
+ if (light_flag & PASSMASK(MIST)) {
+ /* bring depth into 0..1 range */
+ float mist_start = kernel_data.film.mist_start;
+ float mist_inv_depth = kernel_data.film.mist_inv_depth;
+
+ float depth = camera_distance(kg, sd->P);
+ float mist = saturate((depth - mist_start) * mist_inv_depth);
+
+ /* falloff */
+ float mist_falloff = kernel_data.film.mist_falloff;
+
+ if (mist_falloff == 1.0f)
+ ;
+ else if (mist_falloff == 2.0f)
+ mist = mist * mist;
+ else if (mist_falloff == 0.5f)
+ mist = sqrtf(mist);
+ else
+ mist = powf(mist, mist_falloff);
+
+ /* modulate by transparency */
+ float3 alpha = shader_bsdf_alpha(kg, sd);
+ L->mist += (1.0f - mist) * average(throughput * alpha);
+ }
+#endif
+}
+
+ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
+ ccl_global float *buffer,
+ PathRadiance *L)
+{
+#ifdef __PASSES__
+ int light_flag = kernel_data.film.light_pass_flag;
+
+ if (!kernel_data.film.use_light_pass)
+ return;
+
+ if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse);
+ if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy);
+ if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect,
+ L->indirect_transmission);
+ if (light_flag & PASSMASK(SUBSURFACE_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect,
+ L->indirect_subsurface);
+ if (light_flag & PASSMASK(VOLUME_INDIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter);
+ if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
+ if (light_flag & PASSMASK(GLOSSY_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy);
+ if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct,
+ L->direct_transmission);
+ if (light_flag & PASSMASK(SUBSURFACE_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct,
+ L->direct_subsurface);
+ if (light_flag & PASSMASK(VOLUME_DIRECT))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter);
+
+ if (light_flag & PASSMASK(EMISSION))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
+ if (light_flag & PASSMASK(BACKGROUND))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background);
+ if (light_flag & PASSMASK(AO))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao);
+
+ if (light_flag & PASSMASK(DIFFUSE_COLOR))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse);
+ if (light_flag & PASSMASK(GLOSSY_COLOR))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy);
+ if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
+ L->color_transmission);
+ if (light_flag & PASSMASK(SUBSURFACE_COLOR))
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
+ if (light_flag & PASSMASK(SHADOW)) {
+ float4 shadow = L->shadow;
+ shadow.w = kernel_data.film.pass_shadow_scale;
+ kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow);
+ }
+ if (light_flag & PASSMASK(MIST))
+ kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist);
+#endif
+}
+
+ccl_device_inline void kernel_write_result(KernelGlobals *kg,
+ ccl_global float *buffer,
+ int sample,
+ PathRadiance *L)
+{
+ PROFILING_INIT(kg, PROFILING_WRITE_RESULT);
+ PROFILING_OBJECT(PRIM_NONE);
+
+ float alpha;
+ float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
+
+ if (kernel_data.film.pass_flag & PASSMASK(COMBINED)) {
+ kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
+ }
+
+ kernel_write_light_passes(kg, buffer, L);
+
+#ifdef __DENOISING_FEATURES__
+ if (kernel_data.film.pass_denoising_data) {
+# ifdef __SHADOW_TRICKS__
+ kernel_write_denoising_shadow(kg,
+ buffer + kernel_data.film.pass_denoising_data,
+ sample,
+ average(L->path_total),
+ average(L->path_total_shaded));
+# else
+ kernel_write_denoising_shadow(
+ kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f);
+# endif
+ if (kernel_data.film.pass_denoising_clean) {
+ float3 noisy, clean;
+ path_radiance_split_denoising(kg, L, &noisy, &clean);
+ kernel_write_pass_float3_variance(
+ buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, noisy);
+ kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, clean);
+ }
+ else {
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+ DENOISING_PASS_COLOR,
+ ensure_finite3(L_sum));
+ }
+
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+ DENOISING_PASS_NORMAL,
+ L->denoising_normal);
+ kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+ DENOISING_PASS_ALBEDO,
+ L->denoising_albedo);
+ kernel_write_pass_float_variance(
+ buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, L->denoising_depth);
+ }
+#endif /* __DENOISING_FEATURES__ */
+
+#ifdef __KERNEL_DEBUG__
+ kernel_write_debug_passes(kg, buffer, L);
+#endif
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
--- a/intern/cycles/kernel/kernel_path_branched.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_path_branched.h 2020-01-10 20:42:43.464256721 +0300
@@ -523,6 +523,14 @@
buffer += index * pass_stride;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w > 0.0f) {
+ return;
+ }
+ }
+
/* initialize random numbers and ray */
uint rng_hash;
Ray ray;
diff -Naur a/intern/cycles/kernel/kernel_path_branched.h.orig b/intern/cycles/kernel/kernel_path_branched.h.orig
--- a/intern/cycles/kernel/kernel_path_branched.h.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernel_path_branched.h.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,545 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __BRANCHED_PATH__
+
+ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ ccl_addr_space PathState *state,
+ float3 throughput)
+{
+ int num_samples = kernel_data.integrator.ao_samples;
+ float num_samples_inv = 1.0f / num_samples;
+ float ao_factor = kernel_data.background.ao_factor;
+ float3 ao_N;
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+ float3 ao_alpha = shader_bsdf_alpha(kg, sd);
+
+ for (int j = 0; j < num_samples; j++) {
+ float bsdf_u, bsdf_v;
+ path_branched_rng_2D(
+ kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ float3 ao_D;
+ float ao_pdf;
+
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray light_ray;
+ float3 ao_shadow;
+
+ light_ray.P = ray_offset(sd->P, sd->Ng);
+ light_ray.D = ao_D;
+ light_ray.t = kernel_data.background.ao_distance;
+ light_ray.time = sd->time;
+ light_ray.dP = sd->dP;
+ light_ray.dD = differential3_zero();
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
+ path_radiance_accum_ao(
+ kg, L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
+ }
+ else {
+ path_radiance_accum_total_ao(L, state, throughput * num_samples_inv, ao_bsdf);
+ }
+ }
+ }
+}
+
+# ifndef __SPLIT_KERNEL__
+
+# ifdef __VOLUME__
+ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
+ ShaderData *sd,
+ PathState *state,
+ Ray *ray,
+ float3 *throughput,
+ ccl_addr_space Intersection *isect,
+ bool hit,
+ ShaderData *indirect_sd,
+ ShaderData *emission_sd,
+ PathRadiance *L)
+{
+ /* Sanitize volume stack. */
+ if (!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
+
+ if (state->volume_stack[0].shader == SHADER_NONE) {
+ return;
+ }
+
+ /* volume attenuation, emission, scatter */
+ Ray volume_ray = *ray;
+ volume_ray.t = (hit) ? isect->t : FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+# ifdef __VOLUME_DECOUPLED__
+ /* decoupled ray marching only supported on CPU */
+ if (kernel_data.integrator.volume_decoupled) {
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+
+ shader_setup_from_volume(kg, sd, &volume_ray);
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+
+ /* direct light sampling */
+ if (volume_segment.closure_flag & SD_SCATTER) {
+ volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+
+ int all = kernel_data.integrator.sample_all_lights_direct;
+
+ kernel_branched_path_volume_connect_light(
+ kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
+
+ /* indirect light sampling */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f / num_samples;
+
+ for (int j = 0; j < num_samples; j++) {
+ PathState ps = *state;
+ Ray pray = *ray;
+ float3 tp = *throughput;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ /* scatter sample. if we use distance sampling and take just one
+ * sample for direct and indirect light, we could share this
+ * computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
+ float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(
+ kg, &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+ if (result == VOLUME_PATH_SCATTERED &&
+ kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
+ kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp * num_samples_inv, &ps, L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+ }
+ }
+ }
+
+ /* emission and transmittance */
+ if (volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(kg, L, state, *throughput, volume_segment.accum_emission);
+ *throughput *= volume_segment.accum_transmittance;
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+ }
+ else
+# endif /* __VOLUME_DECOUPLED__ */
+ {
+ /* GPU: no decoupled ray marching, scatter probalistically */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f / num_samples;
+
+ /* todo: we should cache the shader evaluations from stepping
+ * through the volume, for now we redo them multiple times */
+
+ for (int j = 0; j < num_samples; j++) {
+ PathState ps = *state;
+ Ray pray = *ray;
+ float3 tp = (*throughput) * num_samples_inv;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
+
+# ifdef __VOLUME_SCATTER__
+ if (result == VOLUME_PATH_SCATTERED) {
+ /* todo: support equiangular, MIS and all light sampling.
+ * alternatively get decoupled ray marching working on the GPU */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
+
+ if (kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
+ kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp, &ps, L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+ }
+ }
+# endif /* __VOLUME_SCATTER__ */
+ }
+
+ /* todo: avoid this calculation using decoupled ray marching */
+ kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
+ }
+}
+# endif /* __VOLUME__ */
+
+/* bounce off surface and integrate indirect light */
+ccl_device_noinline_cpu void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *indirect_sd,
+ ShaderData *emission_sd,
+ float3 throughput,
+ float num_samples_adjust,
+ PathState *state,
+ PathRadiance *L)
+{
+ float sum_sample_weight = 0.0f;
+# ifdef __DENOISING_FEATURES__
+ if (state->denoising_feature_weight > 0.0f) {
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ /* transparency is not handled here, but in outer loop */
+ if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+ continue;
+ }
+
+ sum_sample_weight += sc->sample_weight;
+ }
+ }
+ else {
+ sum_sample_weight = 1.0f;
+ }
+# endif /* __DENOISING_FEATURES__ */
+
+ for (int i = 0; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ /* transparency is not handled here, but in outer loop */
+ if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+ continue;
+ }
+
+ int num_samples;
+
+ if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ num_samples = kernel_data.integrator.diffuse_samples;
+ else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
+ num_samples = 1;
+ else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ num_samples = kernel_data.integrator.glossy_samples;
+ else
+ num_samples = kernel_data.integrator.transmission_samples;
+
+ num_samples = ceil_to_int(num_samples_adjust * num_samples);
+
+ float num_samples_inv = num_samples_adjust / num_samples;
+
+ for (int j = 0; j < num_samples; j++) {
+ PathState ps = *state;
+ float3 tp = throughput;
+ Ray bsdf_ray;
+# ifdef __SHADOW_TRICKS__
+ float shadow_transparency = L->shadow_transparency;
+# endif
+
+ ps.rng_hash = cmj_hash(state->rng_hash, i);
+
+ if (!kernel_branched_path_surface_bounce(
+ kg, sd, sc, j, num_samples, &tp, &ps, &L->state, &bsdf_ray, sum_sample_weight)) {
+ continue;
+ }
+
+ ps.rng_hash = state->rng_hash;
+
+ kernel_path_indirect(kg, indirect_sd, emission_sd, &bsdf_ray, tp * num_samples_inv, &ps, L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+
+# ifdef __SHADOW_TRICKS__
+ L->shadow_transparency = shadow_transparency;
+# endif
+ }
+ }
+}
+
+# ifdef __SUBSURFACE__
+ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *indirect_sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ PathState *state,
+ Ray *ray,
+ float3 throughput)
+{
+ for (int i = 0; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if (!CLOSURE_IS_BSSRDF(sc->type))
+ continue;
+
+ /* set up random number generator */
+ uint lcg_state = lcg_state_init(state, 0x68bc21eb);
+ int num_samples = kernel_data.integrator.subsurface_samples * 3;
+ float num_samples_inv = 1.0f / num_samples;
+ uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i);
+
+ /* do subsurface scatter step with copy of shader data, this will
+ * replace the BSSRDF with a diffuse BSDF closure */
+ for (int j = 0; j < num_samples; j++) {
+ PathState hit_state = *state;
+ path_state_branch(&hit_state, j, num_samples);
+ hit_state.rng_hash = bssrdf_rng_hash;
+
+ LocalIntersection ss_isect;
+ float bssrdf_u, bssrdf_v;
+ path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ int num_hits = subsurface_scatter_multi_intersect(
+ kg, &ss_isect, sd, &hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+ hit_state.rng_offset += PRNG_BOUNCE_NUM;
+
+# ifdef __VOLUME__
+ Ray volume_ray = *ray;
+ bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+ sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+# endif /* __VOLUME__ */
+
+ /* compute lighting with the BSDF closure */
+ for (int hit = 0; hit < num_hits; hit++) {
+ ShaderData bssrdf_sd = *sd;
+ Bssrdf *bssrdf = (Bssrdf *)sc;
+ ClosureType bssrdf_type = sc->type;
+ float bssrdf_roughness = bssrdf->roughness;
+ subsurface_scatter_multi_setup(
+ kg, &ss_isect, hit, &bssrdf_sd, &hit_state, bssrdf_type, bssrdf_roughness);
+
+# ifdef __VOLUME__
+ if (need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
+ volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+ for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ hit_state.volume_stack[k] = state->volume_stack[k];
+ }
+
+ kernel_volume_stack_update_for_subsurface(
+ kg, emission_sd, &volume_ray, hit_state.volume_stack);
+ }
+# endif /* __VOLUME__ */
+
+# ifdef __EMISSION__
+ /* direct light */
+ if (kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_direct) ||
+ (hit_state.flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(
+ kg, &bssrdf_sd, emission_sd, &hit_state, throughput, num_samples_inv, L, all);
+ }
+# endif /* __EMISSION__ */
+
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(
+ kg, &bssrdf_sd, indirect_sd, emission_sd, throughput, num_samples_inv, &hit_state, L);
+ }
+ }
+ }
+}
+# endif /* __SUBSURFACE__ */
+
+ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
+ uint rng_hash,
+ int sample,
+ Ray ray,
+ ccl_global float *buffer,
+ PathRadiance *L)
+{
+ /* initialize */
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+ path_radiance_init(kg, L);
+
+ /* shader data memory used for both volumes and surfaces, saves stack space */
+ ShaderData sd;
+ /* shader data used by emission, shadows, volume stacks, indirect path */
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+ ShaderData indirect_sd;
+
+ PathState state;
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+
+ /* Main Loop
+ * Here we only handle transparency intersections from the camera ray.
+ * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
+ */
+ for (;;) {
+ /* Find intersection with objects in scene. */
+ Intersection isect;
+ bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
+
+# ifdef __VOLUME__
+ /* Volume integration. */
+ kernel_branched_path_volume(
+ kg, &sd, &state, &ray, &throughput, &isect, hit, &indirect_sd, emission_sd, L);
+# endif /* __VOLUME__ */
+
+ /* Shade background. */
+ if (!hit) {
+ kernel_path_background(kg, &state, &ray, throughput, &sd, buffer, L);
+ break;
+ }
+
+ /* Setup and evaluate shader. */
+ shader_setup_from_ray(kg, &sd, &isect, &ray);
+
+ /* Skip most work for volume bounding surface. */
+# ifdef __VOLUME__
+ if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+# endif
+
+ shader_eval_surface(kg, &sd, &state, buffer, state.flag);
+ shader_merge_closures(&sd);
+
+ /* Apply shadow catcher, holdout, emission. */
+ if (!kernel_path_shader_apply(kg, &sd, &state, &ray, throughput, emission_sd, L, buffer)) {
+ break;
+ }
+
+ /* transparency termination */
+ if (state.flag & PATH_RAY_TRANSPARENT) {
+ /* path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
+ float probability = path_state_continuation_probability(kg, &state, throughput);
+
+ if (probability == 0.0f) {
+ break;
+ }
+ else if (probability != 1.0f) {
+ float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE);
+
+ if (terminate >= probability)
+ break;
+
+ throughput /= probability;
+ }
+ }
+
+# ifdef __DENOISING_FEATURES__
+ kernel_update_denoising_features(kg, &sd, &state, L);
+# endif
+
+# ifdef __AO__
+ /* ambient occlusion */
+ if (kernel_data.integrator.use_ambient_occlusion) {
+ kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
+ }
+# endif /* __AO__ */
+
+# ifdef __SUBSURFACE__
+ /* bssrdf scatter to a different location on the same object */
+ if (sd.flag & SD_BSSRDF) {
+ kernel_branched_path_subsurface_scatter(
+ kg, &sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
+ }
+# endif /* __SUBSURFACE__ */
+
+ PathState hit_state = state;
+
+# ifdef __EMISSION__
+ /* direct light */
+ if (kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_direct) ||
+ (state.flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(
+ kg, &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
+ }
+# endif /* __EMISSION__ */
+
+ /* indirect light */
+ kernel_branched_path_surface_indirect_light(
+ kg, &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
+
+ /* continue in case of transparency */
+ throughput *= shader_bsdf_transparency(kg, &sd);
+
+ if (is_zero(throughput))
+ break;
+
+ /* Update Path State */
+ path_state_next(kg, &state, LABEL_TRANSPARENT);
+
+# ifdef __VOLUME__
+ }
+ else {
+ if (!path_state_volume_next(kg, &state)) {
+ break;
+ }
+ }
+# endif
+
+ ray.P = ray_offset(sd.P, -sd.Ng);
+ ray.t -= sd.ray_length; /* clipping works through transparent */
+
+# ifdef __RAY_DIFFERENTIALS__
+ ray.dP = sd.dP;
+ ray.dD.dx = -sd.dI.dx;
+ ray.dD.dy = -sd.dI.dy;
+# endif /* __RAY_DIFFERENTIALS__ */
+
+# ifdef __VOLUME__
+ /* enter/exit volume */
+ kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
+# endif /* __VOLUME__ */
+ }
+}
+
+ccl_device void kernel_branched_path_trace(
+ KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
+{
+ /* buffer offset */
+ int index = offset + x + y * stride;
+ int pass_stride = kernel_data.film.pass_stride;
+
+ buffer += index * pass_stride;
+
+ /* initialize random numbers and ray */
+ uint rng_hash;
+ Ray ray;
+
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+
+ /* integrate */
+ PathRadiance L;
+
+ if (ray.t != 0.0f) {
+ kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L);
+ kernel_write_result(kg, buffer, sample, &L);
+ }
+}
+
+# endif /* __SPLIT_KERNEL__ */
+
+#endif /* __BRANCHED_PATH__ */
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
--- a/intern/cycles/kernel/kernel_path.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_path.h 2020-01-10 20:42:43.464256721 +0300
@@ -31,6 +31,7 @@
#include "kernel/kernel_accumulate.h"
#include "kernel/kernel_shader.h"
#include "kernel/kernel_light.h"
+#include "kernel/kernel_adaptive_sampling.h"
#include "kernel/kernel_passes.h"
#if defined(__VOLUME__) || defined(__SUBSURFACE__)
@@ -656,6 +657,14 @@
buffer += index * pass_stride;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w > 0.0f) {
+ return;
+ }
+ }
+
/* Initialize random numbers and sample ray. */
uint rng_hash;
Ray ray;
diff -Naur a/intern/cycles/kernel/kernel_path.h.orig b/intern/cycles/kernel/kernel_path.h.orig
--- a/intern/cycles/kernel/kernel_path.h.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernel_path.h.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,698 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __OSL__
+# include "kernel/osl/osl_shader.h"
+#endif
+
+#include "kernel/kernel_random.h"
+#include "kernel/kernel_projection.h"
+#include "kernel/kernel_montecarlo.h"
+#include "kernel/kernel_differential.h"
+#include "kernel/kernel_camera.h"
+
+#include "kernel/geom/geom.h"
+#include "kernel/bvh/bvh.h"
+
+#include "kernel/kernel_write_passes.h"
+#include "kernel/kernel_accumulate.h"
+#include "kernel/kernel_shader.h"
+#include "kernel/kernel_light.h"
+#include "kernel/kernel_passes.h"
+
+#if defined(__VOLUME__) || defined(__SUBSURFACE__)
+# include "kernel/kernel_volume.h"
+#endif
+
+#ifdef __SUBSURFACE__
+# include "kernel/kernel_subsurface.h"
+#endif
+
+#include "kernel/kernel_path_state.h"
+#include "kernel/kernel_shadow.h"
+#include "kernel/kernel_emission.h"
+#include "kernel/kernel_path_common.h"
+#include "kernel/kernel_path_surface.h"
+#include "kernel/kernel_path_volume.h"
+#include "kernel/kernel_path_subsurface.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ Intersection *isect,
+ PathRadiance *L)
+{
+ PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT);
+
+ uint visibility = path_state_ray_visibility(kg, state);
+
+ if (path_state_ao_bounce(kg, state)) {
+ visibility = PATH_RAY_SHADOW;
+ ray->t = kernel_data.background.ao_distance;
+ }
+
+ bool hit = scene_intersect(kg, ray, visibility, isect);
+
+#ifdef __KERNEL_DEBUG__
+ if (state->flag & PATH_RAY_CAMERA) {
+ L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes;
+ L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances;
+ L->debug_data.num_bvh_intersections += isect->num_intersections;
+ }
+ L->debug_data.num_ray_bounces++;
+#endif /* __KERNEL_DEBUG__ */
+
+ return hit;
+}
+
+ccl_device_forceinline void kernel_path_lamp_emission(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ float3 throughput,
+ ccl_addr_space Intersection *isect,
+ ShaderData *emission_sd,
+ PathRadiance *L)
+{
+ PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION);
+
+#ifdef __LAMP_MIS__
+ if (kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
+ /* ray starting from previous non-transparent bounce */
+ Ray light_ray ccl_optional_struct_init;
+
+ light_ray.P = ray->P - state->ray_t * ray->D;
+ state->ray_t += isect->t;
+ light_ray.D = ray->D;
+ light_ray.t = state->ray_t;
+ light_ray.time = ray->time;
+ light_ray.dD = ray->dD;
+ light_ray.dP = ray->dP;
+
+ /* intersect with lamp */
+ indirect_lamp_emission(kg, emission_sd, state, L, &light_ray, throughput);
+ }
+#endif /* __LAMP_MIS__ */
+}
+
+ccl_device_forceinline void kernel_path_background(KernelGlobals *kg,
+ ccl_addr_space PathState *state,
+ ccl_addr_space Ray *ray,
+ float3 throughput,
+ ShaderData *sd,
+ ccl_global float *buffer,
+ PathRadiance *L)
+{
+ /* eval background shader if nothing hit */
+ if (kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+ L->transparent += average(throughput);
+
+#ifdef __PASSES__
+ if (!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND)))
+#endif /* __PASSES__ */
+ return;
+ }
+
+ /* When using the ao bounces approximation, adjust background
+ * shader intensity with ao factor. */
+ if (path_state_ao_bounce(kg, state)) {
+ throughput *= kernel_data.background.ao_bounces_factor;
+ }
+
+#ifdef __BACKGROUND__
+ /* sample background shader */
+ float3 L_background = indirect_background(kg, sd, state, buffer, ray);
+ path_radiance_accum_background(kg, L, state, throughput, L_background);
+#endif /* __BACKGROUND__ */
+}
+
+#ifndef __SPLIT_KERNEL__
+
+# ifdef __VOLUME__
+ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *kg,
+ ShaderData *sd,
+ PathState *state,
+ Ray *ray,
+ float3 *throughput,
+ ccl_addr_space Intersection *isect,
+ bool hit,
+ ShaderData *emission_sd,
+ PathRadiance *L)
+{
+ PROFILING_INIT(kg, PROFILING_VOLUME);
+
+ /* Sanitize volume stack. */
+ if (!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
+
+ if (state->volume_stack[0].shader == SHADER_NONE) {
+ return VOLUME_PATH_ATTENUATED;
+ }
+
+ /* volume attenuation, emission, scatter */
+ Ray volume_ray = *ray;
+ volume_ray.t = (hit) ? isect->t : FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+# ifdef __VOLUME_DECOUPLED__
+ int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+ bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
+ bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+
+ if (decoupled) {
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+
+ shader_setup_from_volume(kg, sd, &volume_ray);
+ kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+
+ volume_segment.sampling_method = sampling_method;
+
+ /* emission */
+ if (volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(kg, L, state, *throughput, volume_segment.accum_emission);
+
+ /* scattering */
+ VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+
+ if (volume_segment.closure_flag & SD_SCATTER) {
+ int all = kernel_data.integrator.sample_all_lights_indirect;
+
+ /* direct light sampling */
+ kernel_branched_path_volume_connect_light(
+ kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
+
+ /* indirect sample. if we use distance sampling and take just
+ * one sample for direct and indirect light, we could share
+ * this computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+ float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+ result = kernel_volume_decoupled_scatter(
+ kg, state, &volume_ray, sd, throughput, rphase, rscatter, &volume_segment, NULL, true);
+ }
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+
+ if (result == VOLUME_PATH_SCATTERED) {
+ if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+ return VOLUME_PATH_SCATTERED;
+ else
+ return VOLUME_PATH_MISSED;
+ }
+ else {
+ *throughput *= volume_segment.accum_transmittance;
+ }
+ }
+ else
+# endif /* __VOLUME_DECOUPLED__ */
+ {
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+
+# ifdef __VOLUME_SCATTER__
+ if (result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+ /* indirect light bounce */
+ if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+ return VOLUME_PATH_SCATTERED;
+ else
+ return VOLUME_PATH_MISSED;
+ }
+# endif /* __VOLUME_SCATTER__ */
+ }
+
+ return VOLUME_PATH_ATTENUATED;
+}
+# endif /* __VOLUME__ */
+
+#endif /* __SPLIT_KERNEL__ */
+
+ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
+ ShaderData *sd,
+ ccl_addr_space PathState *state,
+ ccl_addr_space Ray *ray,
+ float3 throughput,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ ccl_global float *buffer)
+{
+ PROFILING_INIT(kg, PROFILING_SHADER_APPLY);
+
+#ifdef __SHADOW_TRICKS__
+ if ((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
+ if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) {
+ state->flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_STORE_SHADOW_INFO);
+
+ float3 bg = make_float3(0.0f, 0.0f, 0.0f);
+ if (!kernel_data.background.transparent) {
+ bg = indirect_background(kg, emission_sd, state, NULL, ray);
+ }
+ path_radiance_accum_shadowcatcher(L, throughput, bg);
+ }
+ }
+ else if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+ /* Only update transparency after shadow catcher bounce. */
+ L->shadow_transparency *= average(shader_bsdf_transparency(kg, sd));
+ }
+#endif /* __SHADOW_TRICKS__ */
+
+ /* holdout */
+#ifdef __HOLDOUT__
+ if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
+ (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+ if (kernel_data.background.transparent) {
+ float3 holdout_weight;
+ if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+ holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
+ }
+ else {
+ holdout_weight = shader_holdout_eval(kg, sd);
+ }
+ /* any throughput is ok, should all be identical here */
+ L->transparent += average(holdout_weight * throughput);
+ }
+
+ if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+ return false;
+ }
+ }
+#endif /* __HOLDOUT__ */
+
+ /* holdout mask objects do not write data passes */
+ kernel_write_data_passes(kg, buffer, L, sd, state, throughput);
+
+ /* blurring of bsdf after bounces, for rays that have a small likelihood
+ * of following this particular path (diffuse, rough glossy) */
+ if (kernel_data.integrator.filter_glossy != FLT_MAX) {
+ float blur_pdf = kernel_data.integrator.filter_glossy * state->min_ray_pdf;
+
+ if (blur_pdf < 1.0f) {
+ float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
+ shader_bsdf_blur(kg, sd, blur_roughness);
+ }
+ }
+
+#ifdef __EMISSION__
+ /* emission */
+ if (sd->flag & SD_EMISSION) {
+ float3 emission = indirect_primitive_emission(
+ kg, sd, sd->ray_length, state->flag, state->ray_pdf);
+ path_radiance_accum_emission(kg, L, state, throughput, emission);
+ }
+#endif /* __EMISSION__ */
+
+ return true;
+}
+
+#ifdef __KERNEL_OPTIX__
+ccl_device_inline /* inline trace calls */
+#else
+ccl_device_noinline
+#endif
+ void
+ kernel_path_ao(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ PathRadiance *L,
+ ccl_addr_space PathState *state,
+ float3 throughput,
+ float3 ao_alpha)
+{
+ PROFILING_INIT(kg, PROFILING_AO);
+
+ /* todo: solve correlation */
+ float bsdf_u, bsdf_v;
+
+ path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ float ao_factor = kernel_data.background.ao_factor;
+ float3 ao_N;
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+ float3 ao_D;
+ float ao_pdf;
+
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray light_ray;
+ float3 ao_shadow;
+
+ light_ray.P = ray_offset(sd->P, sd->Ng);
+ light_ray.D = ao_D;
+ light_ray.t = kernel_data.background.ao_distance;
+ light_ray.time = sd->time;
+ light_ray.dP = sd->dP;
+ light_ray.dD = differential3_zero();
+
+ if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
+ path_radiance_accum_ao(kg, L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
+ }
+ else {
+ path_radiance_accum_total_ao(L, state, throughput, ao_bsdf);
+ }
+ }
+}
+
+#ifndef __SPLIT_KERNEL__
+
+# if defined(__BRANCHED_PATH__) || defined(__BAKING__)
+
+ccl_device void kernel_path_indirect(KernelGlobals *kg,
+ ShaderData *sd,
+ ShaderData *emission_sd,
+ Ray *ray,
+ float3 throughput,
+ PathState *state,
+ PathRadiance *L)
+{
+# ifdef __SUBSURFACE__
+ SubsurfaceIndirectRays ss_indirect;
+ kernel_path_subsurface_init_indirect(&ss_indirect);
+
+ for (;;) {
+# endif /* __SUBSURFACE__ */
+
+ /* path iteration */
+ for (;;) {
+ /* Find intersection with objects in scene. */
+ Intersection isect;
+ bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
+
+ /* Find intersection with lamps and compute emission for MIS. */
+ kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L);
+
+# ifdef __VOLUME__
+ /* Volume integration. */
+ VolumeIntegrateResult result = kernel_path_volume(
+ kg, sd, state, ray, &throughput, &isect, hit, emission_sd, L);
+
+ if (result == VOLUME_PATH_SCATTERED) {
+ continue;
+ }
+ else if (result == VOLUME_PATH_MISSED) {
+ break;
+ }
+# endif /* __VOLUME__*/
+
+ /* Shade background. */
+ if (!hit) {
+ kernel_path_background(kg, state, ray, throughput, sd, NULL, L);
+ break;
+ }
+ else if (path_state_ao_bounce(kg, state)) {
+ break;
+ }
+
+ /* Setup shader data. */
+ shader_setup_from_ray(kg, sd, &isect, ray);
+
+ /* Skip most work for volume bounding surface. */
+# ifdef __VOLUME__
+ if (!(sd->flag & SD_HAS_ONLY_VOLUME)) {
+# endif
+
+ /* Evaluate shader. */
+ shader_eval_surface(kg, sd, state, NULL, state->flag);
+ shader_prepare_closures(sd, state);
+
+ /* Apply shadow catcher, holdout, emission. */
+ if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, NULL)) {
+ break;
+ }
+
+ /* path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
+ float probability = path_state_continuation_probability(kg, state, throughput);
+
+ if (probability == 0.0f) {
+ break;
+ }
+ else if (probability != 1.0f) {
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+
+ if (terminate >= probability)
+ break;
+
+ throughput /= probability;
+ }
+
+# ifdef __DENOISING_FEATURES__
+ kernel_update_denoising_features(kg, sd, state, L);
+# endif
+
+# ifdef __AO__
+ /* ambient occlusion */
+ if (kernel_data.integrator.use_ambient_occlusion) {
+ kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f));
+ }
+# endif /* __AO__ */
+
+# ifdef __SUBSURFACE__
+ /* bssrdf scatter to a different location on the same object, replacing
+ * the closures with a diffuse BSDF */
+ if (sd->flag & SD_BSSRDF) {
+ if (kernel_path_subsurface_scatter(
+ kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
+ break;
+ }
+ }
+# endif /* __SUBSURFACE__ */
+
+# if defined(__EMISSION__)
+ int all = (kernel_data.integrator.sample_all_lights_indirect) ||
+ (state->flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(
+ kg, sd, emission_sd, state, throughput, 1.0f, L, all);
+# endif /* defined(__EMISSION__) */
+
+# ifdef __VOLUME__
+ }
+# endif
+
+ if (!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
+ break;
+ }
+
+# ifdef __SUBSURFACE__
+ /* Trace indirect subsurface rays by restarting the loop. this uses less
+ * stack memory than invoking kernel_path_indirect.
+ */
+ if (ss_indirect.num_rays) {
+ kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput);
+ }
+ else {
+ break;
+ }
+ }
+# endif /* __SUBSURFACE__ */
+}
+
+# endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
+
+ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
+ PathState *state,
+ float3 throughput,
+ Ray *ray,
+ PathRadiance *L,
+ ccl_global float *buffer,
+ ShaderData *emission_sd)
+{
+ PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE);
+
+ /* Shader data memory used for both volumes and surfaces, saves stack space. */
+ ShaderData sd;
+
+# ifdef __SUBSURFACE__
+ SubsurfaceIndirectRays ss_indirect;
+ kernel_path_subsurface_init_indirect(&ss_indirect);
+
+ for (;;) {
+# endif /* __SUBSURFACE__ */
+
+ /* path iteration */
+ for (;;) {
+ /* Find intersection with objects in scene. */
+ Intersection isect;
+ bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
+
+ /* Find intersection with lamps and compute emission for MIS. */
+ kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
+
+# ifdef __VOLUME__
+ /* Volume integration. */
+ VolumeIntegrateResult result = kernel_path_volume(
+ kg, &sd, state, ray, &throughput, &isect, hit, emission_sd, L);
+
+ if (result == VOLUME_PATH_SCATTERED) {
+ continue;
+ }
+ else if (result == VOLUME_PATH_MISSED) {
+ break;
+ }
+# endif /* __VOLUME__*/
+
+ /* Shade background. */
+ if (!hit) {
+ kernel_path_background(kg, state, ray, throughput, &sd, buffer, L);
+ break;
+ }
+ else if (path_state_ao_bounce(kg, state)) {
+ break;
+ }
+
+ /* Setup shader data. */
+ shader_setup_from_ray(kg, &sd, &isect, ray);
+
+ /* Skip most work for volume bounding surface. */
+# ifdef __VOLUME__
+ if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+# endif
+
+ /* Evaluate shader. */
+ shader_eval_surface(kg, &sd, state, buffer, state->flag);
+ shader_prepare_closures(&sd, state);
+
+ /* Apply shadow catcher, holdout, emission. */
+ if (!kernel_path_shader_apply(kg, &sd, state, ray, throughput, emission_sd, L, buffer)) {
+ break;
+ }
+
+ /* path termination. this is a strange place to put the termination, it's
+ * mainly due to the mixed in MIS that we use. gives too many unneeded
+ * shader evaluations, only need emission if we are going to terminate */
+ float probability = path_state_continuation_probability(kg, state, throughput);
+
+ if (probability == 0.0f) {
+ break;
+ }
+ else if (probability != 1.0f) {
+ float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+ if (terminate >= probability)
+ break;
+
+ throughput /= probability;
+ }
+
+# ifdef __DENOISING_FEATURES__
+ kernel_update_denoising_features(kg, &sd, state, L);
+# endif
+
+# ifdef __AO__
+ /* ambient occlusion */
+ if (kernel_data.integrator.use_ambient_occlusion) {
+ kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd));
+ }
+# endif /* __AO__ */
+
+# ifdef __SUBSURFACE__
+ /* bssrdf scatter to a different location on the same object, replacing
+ * the closures with a diffuse BSDF */
+ if (sd.flag & SD_BSSRDF) {
+ if (kernel_path_subsurface_scatter(
+ kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
+ break;
+ }
+ }
+# endif /* __SUBSURFACE__ */
+
+# ifdef __EMISSION__
+ /* direct lighting */
+ kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
+# endif /* __EMISSION__ */
+
+# ifdef __VOLUME__
+ }
+# endif
+
+ /* compute direct lighting and next bounce */
+ if (!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray))
+ break;
+ }
+
+# ifdef __SUBSURFACE__
+ /* Trace indirect subsurface rays by restarting the loop. this uses less
+ * stack memory than invoking kernel_path_indirect.
+ */
+ if (ss_indirect.num_rays) {
+ kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput);
+ }
+ else {
+ break;
+ }
+ }
+# endif /* __SUBSURFACE__ */
+}
+
+ccl_device void kernel_path_trace(
+ KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
+{
+ PROFILING_INIT(kg, PROFILING_RAY_SETUP);
+
+ /* buffer offset */
+ int index = offset + x + y * stride;
+ int pass_stride = kernel_data.film.pass_stride;
+
+ buffer += index * pass_stride;
+
+ /* Initialize random numbers and sample ray. */
+ uint rng_hash;
+ Ray ray;
+
+ kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+
+# ifndef __KERNEL_OPTIX__
+ if (ray.t == 0.0f) {
+ return;
+ }
+# endif
+
+ /* Initialize state. */
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+ PathRadiance L;
+ path_radiance_init(kg, &L);
+
+ ShaderDataTinyStorage emission_sd_storage;
+ ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
+ PathState state;
+ path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+
+# ifdef __KERNEL_OPTIX__
+ /* Force struct into local memory to avoid costly spilling on trace calls. */
+ if (pass_stride < 0) /* This is never executed and just prevents the compiler from doing SROA. */
+ for (int i = 0; i < sizeof(L); ++i)
+ reinterpret_cast<unsigned char *>(&L)[-pass_stride + i] = 0;
+# endif
+
+ /* Integrate. */
+ kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd);
+
+ kernel_write_result(kg, buffer, sample, &L);
+}
+
+#endif /* __SPLIT_KERNEL__ */
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
--- a/intern/cycles/kernel/kernel_random.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_random.h 2020-01-10 20:55:57.757604393 +0300
@@ -43,20 +43,34 @@
uint i = index + SOBOL_SKIP;
for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
j += x;
- result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j - 1);
+ result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j);
}
return result;
}
#endif /* __SOBOL__ */
+#define NUM_PJ_SAMPLES 64 * 64
+#define NUM_PJ_PATTERNS 48
+
ccl_device_forceinline float path_rng_1D(
KernelGlobals *kg, uint rng_hash, int sample, int num_samples, int dimension)
{
#ifdef __DEBUG_CORRELATION__
return (float)drand48();
#endif
-
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+ /* Fallback to random */
+ if (sample > NUM_PJ_SAMPLES) {
+ int p = rng_hash + dimension;
+ return cmj_randfloat(sample, p);
+ }
+ uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+ int index = ((dimension % NUM_PJ_PATTERNS) * NUM_PJ_SAMPLES + sample) * 2;
+ return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^
+ (tmp_rng & 0x007fffff)) -
+ 1.0f;
+ }
#ifdef __CMJ__
# ifdef __SOBOL__
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -99,7 +113,22 @@
*fy = (float)drand48();
return;
#endif
-
+ if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+ if (sample > NUM_PJ_SAMPLES) {
+ int p = rng_hash + dimension;
+ *fx = cmj_randfloat(sample, p);
+ *fy = cmj_randfloat(sample, p + 1);
+ }
+ uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+ int index = ((dimension % NUM_PJ_PATTERNS) * NUM_PJ_SAMPLES + sample) * 2;
+ *fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) -
+ 1.0f;
+ tmp_rng = cmj_hash_simple(dimension + 1, rng_hash);
+ *fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^
+ (tmp_rng & 0x007fffff)) -
+ 1.0f;
+ return;
+ }
#ifdef __CMJ__
# ifdef __SOBOL__
if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -284,4 +313,23 @@
return (float)*rng * (1.0f / (float)0xFFFFFFFF);
}
+ccl_device_inline bool sample_is_even(int pattern, int sample)
+{
+ if (pattern == SAMPLING_PATTERN_PMJ) {
+ /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
+ * We can use this to get divide sample sequence into two classes for easier variance estimation.
+ * There must be a more elegant way of writing this? */
+ return (bool)(sample & 2) ^ (bool)(sample & 8) ^ (bool)(sample & 0x20) ^
+ (bool)(sample & 0x80) ^ (bool)(sample & 0x200) ^ (bool)(sample & 0x800) ^
+ (bool)(sample & 0x2000) ^ (bool)(sample & 0x8000) ^ (bool)(sample & 0x20000) ^
+ (bool)(sample & 0x80000) ^ (bool)(sample & 0x200000) ^ (bool)(sample & 0x800000) ^
+ (bool)(sample & 0x2000000) ^ (bool)(sample & 0x8000000) ^ (bool)(sample & 0x20000000) ^
+ (bool)(sample & 0x80000000);
+ }
+ else {
+ /* TODO: Are there reliable ways of dividing CMJ and Sobol into two classes? */
+ return sample & 0x1;
+ }
+}
+
CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h 2020-01-10 20:42:43.467590055 +0300
@@ -89,5 +89,9 @@
DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
#undef KERNEL_ARCH
diff -Naur a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h 2020-01-10 20:42:43.467590055 +0300
@@ -58,6 +58,10 @@
# include "kernel/split/kernel_next_iteration_setup.h"
# include "kernel/split/kernel_indirect_subsurface.h"
# include "kernel/split/kernel_buffer_update.h"
+# include "kernel/split/kernel_adaptive_stopping.h"
+# include "kernel/split/kernel_adaptive_filter_x.h"
+# include "kernel/split/kernel_adaptive_filter_y.h"
+# include "kernel/split/kernel_adaptive_adjust_samples.h"
# endif /* __SPLIT_KERNEL__ */
#else
# define STUB_ASSERT(arch, name) \
@@ -204,6 +208,10 @@
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
#endif /* __SPLIT_KERNEL__ */
#undef KERNEL_STUB
diff -Naur a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu 2020-01-10 20:42:43.467590055 +0300
@@ -33,6 +33,7 @@
#include "kernel/kernel_path_branched.h"
#include "kernel/kernel_bake.h"
#include "kernel/kernel_work_stealing.h"
+#include "kernel/kernel_adaptive_sampling.h"
/* kernels */
extern "C" __global__ void
@@ -83,6 +84,75 @@
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size)
+{
+ int work_index = ccl_global_id(0);
+ bool thread_is_active = work_index < total_work_size;
+ KernelGlobals kg;
+ if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) {
+ uint x = tile->x + work_index % tile->w;
+ uint y = tile->y + work_index / tile->w;
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ kernel_do_adaptive_stopping(&kg, buffer, sample);
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint)
+{
+ KernelGlobals kg;
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+ if(ccl_global_id(0) < tile->h) {
+ int y = tile->y + ccl_global_id(0);
+ kernel_do_adaptive_filter_x(&kg, y, tile);
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint)
+{
+ KernelGlobals kg;
+ if(kernel_data.film.pass_adaptive_aux_buffer && sample >= kernel_data.integrator.adaptive_min_samples - 1) {
+ if(ccl_global_id(0) < tile->w) {
+ int x = tile->x + ccl_global_id(0);
+ kernel_do_adaptive_filter_y(&kg, x, tile);
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size)
+{
+ if(kernel_data.film.pass_adaptive_aux_buffer) {
+ int work_index = ccl_global_id(0);
+ bool thread_is_active = work_index < total_work_size;
+ KernelGlobals kg;
+ if(thread_is_active) {
+ uint x = tile->x + work_index % tile->w;
+ uint y = tile->y + work_index / tile->w;
+ int index = tile->offset + x + y * tile->stride;
+ ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+ if(buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = sample / max((float)start_sample + 1.0f, buffer[kernel_data.film.pass_sample_count]);
+ if(sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f));
+ }
+ }
+ }
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
diff -Naur a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu 2020-01-10 20:42:43.467590055 +0300
@@ -43,6 +43,10 @@
#include "kernel/split/kernel_next_iteration_setup.h"
#include "kernel/split/kernel_indirect_subsurface.h"
#include "kernel/split/kernel_buffer_update.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
#include "kernel/kernel_film.h"
@@ -121,6 +125,10 @@
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
extern "C" __global__ void
CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl 2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
+
+#define KERNEL_NAME adaptive_adjust_samples
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl 2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+
+#define KERNEL_NAME adaptive_filter_x
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl 2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+
+#define KERNEL_NAME adaptive_filter_y
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl 2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+
+#define KERNEL_NAME adaptive_stopping
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl 2020-01-10 20:42:43.467590055 +0300
@@ -28,3 +28,7 @@
#include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
#include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
#include "kernel/kernels/opencl/kernel_buffer_update.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl"
diff -Naur a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
--- a/intern/cycles/kernel/kernel_textures.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_textures.h 2020-01-10 20:42:43.464256721 +0300
@@ -77,7 +77,7 @@
KERNEL_TEX(float, __lookup_table)
/* sobol */
-KERNEL_TEX(uint, __sobol_directions)
+KERNEL_TEX(uint, __sample_pattern_lut)
/* image textures */
KERNEL_TEX(TextureInfo, __texture_info)
diff -Naur a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
--- a/intern/cycles/kernel/kernel_types.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_types.h 2020-01-10 20:49:57.107597884 +0300
@@ -269,6 +269,7 @@
enum SamplingPattern {
SAMPLING_PATTERN_SOBOL = 0,
SAMPLING_PATTERN_CMJ = 1,
+ SAMPLING_PATTERN_PMJ = 2,
SAMPLING_NUM_PATTERNS,
};
@@ -373,6 +374,8 @@
#endif
PASS_RENDER_TIME,
PASS_CRYPTOMATTE,
+ PASS_ADAPTIVE_AUX_BUFFER,
+ PASS_SAMPLE_COUNT,
PASS_AOV_COLOR,
PASS_AOV_VALUE,
PASS_CATEGORY_MAIN_END = 31,
@@ -1239,6 +1242,9 @@
int cryptomatte_depth;
int pass_cryptomatte;
+ int pass_adaptive_aux_buffer;
+ int pass_sample_count;
+
int pass_mist;
float mist_start;
float mist_inv_depth;
@@ -1273,6 +1279,7 @@
int display_divide_pass_stride;
int use_display_exposure;
int use_display_pass_alpha;
+ int pad1, pad2;
} KernelFilm;
static_assert_align(KernelFilm, 16);
@@ -1354,6 +1361,8 @@
/* sampler */
int sampling_pattern;
int aa_samples;
+ int adaptive_min_samples;
+ float adaptive_threshold;
/* volume render */
int use_volumes;
@@ -1429,7 +1438,7 @@
typedef struct KernelTables {
int beckmann_offset;
- int pad1, pad2, pad3;
+ int pad1;
} KernelTables;
static_assert_align(KernelTables, 16);
@@ -1679,7 +1688,7 @@
uint start_sample;
uint num_samples;
- uint offset;
+ int offset;
uint stride;
ccl_global float *buffer;
diff -Naur a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
--- a/intern/cycles/kernel/kernel_work_stealing.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_work_stealing.h 2020-01-10 20:42:43.467590055 +0300
@@ -23,17 +23,41 @@
* Utility functions for work stealing
*/
+/* Map global work index to tile, pixel X/Y and sample. */
+ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
+ uint global_work_index,
+ ccl_private uint *x,
+ ccl_private uint *y,
+ ccl_private uint *sample)
+{
+#ifdef __KERNEL_CUDA__
+ /* Keeping threads for the same pixel together improves performance on CUDA. */
+ uint sample_offset = global_work_index % tile->num_samples;
+ uint pixel_offset = global_work_index / tile->num_samples;
+#else /* __KERNEL_CUDA__ */
+ uint tile_pixels = tile->w * tile->h;
+ uint sample_offset = global_work_index / tile_pixels;
+ uint pixel_offset = global_work_index - sample_offset * tile_pixels;
+#endif /* __KERNEL_CUDA__ */
+ uint y_offset = pixel_offset / tile->w;
+ uint x_offset = pixel_offset - y_offset * tile->w;
+
+ *x = tile->x + x_offset;
+ *y = tile->y + y_offset;
+ *sample = tile->start_sample + sample_offset;
+}
+
#ifdef __KERNEL_OPENCL__
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
#ifdef __SPLIT_KERNEL__
/* Returns true if there is work */
-ccl_device bool get_next_work(KernelGlobals *kg,
- ccl_global uint *work_pools,
- uint total_work_size,
- uint ray_index,
- ccl_private uint *global_work_index)
+ccl_device bool get_next_work_item(KernelGlobals *kg,
+ ccl_global uint *work_pools,
+ uint total_work_size,
+ uint ray_index,
+ ccl_private uint *global_work_index)
{
/* With a small amount of work there may be more threads than work due to
* rounding up of global size, stop such threads immediately. */
@@ -56,31 +80,37 @@
/* Test if all work for this pool is done. */
return (*global_work_index < total_work_size);
}
-#endif
-/* Map global work index to tile, pixel X/Y and sample. */
-ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
- uint global_work_index,
- ccl_private uint *x,
- ccl_private uint *y,
- ccl_private uint *sample)
+ccl_device bool get_next_work(KernelGlobals *kg,
+ ccl_global uint *work_pools,
+ uint total_work_size,
+ uint ray_index,
+ ccl_private uint *global_work_index)
{
-#ifdef __KERNEL_CUDA__
- /* Keeping threads for the same pixel together improves performance on CUDA. */
- uint sample_offset = global_work_index % tile->num_samples;
- uint pixel_offset = global_work_index / tile->num_samples;
-#else /* __KERNEL_CUDA__ */
- uint tile_pixels = tile->w * tile->h;
- uint sample_offset = global_work_index / tile_pixels;
- uint pixel_offset = global_work_index - sample_offset * tile_pixels;
-#endif /* __KERNEL_CUDA__ */
- uint y_offset = pixel_offset / tile->w;
- uint x_offset = pixel_offset - y_offset * tile->w;
-
- *x = tile->x + x_offset;
- *y = tile->y + y_offset;
- *sample = tile->start_sample + sample_offset;
+ bool got_work = false;
+ if (kernel_data.film.pass_adaptive_aux_buffer) {
+ do {
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+ if (got_work) {
+ ccl_global WorkTile *tile = &kernel_split_params.tile;
+ uint x, y, sample;
+ get_work_pixel(tile, *global_work_index, &x, &y, &sample);
+ uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+ kernel_data.film.pass_adaptive_aux_buffer);
+ if (aux->w == 0.0f) {
+ break;
+ }
+ }
+ } while (got_work);
+ }
+ else {
+ got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+ }
+ return got_work;
}
+#endif
CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
--- a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h 2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_adjust_samples(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h) {
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+ int buffer_offset = (kernel_split_params.tile.offset + x +
+ y * kernel_split_params.tile.stride) *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ int sample = kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples;
+ if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+ buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+ float sample_multiplier = sample / max((float)kernel_split_params.tile.start_sample + 1.0f,
+ buffer[kernel_data.film.pass_sample_count]);
+ if (sample_multiplier != 1.0f) {
+ kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+ }
+ }
+ else {
+ kernel_adaptive_post_adjust(kg, buffer, sample / (sample - 1.0f));
+ }
+ }
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
--- a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h 2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_x(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.h &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int y = kernel_split_params.tile.y + pixel_index;
+ kernel_do_adaptive_filter_x(kg, y, &kernel_split_params.tile);
+ }
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
--- a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h 2020-01-10 20:42:43.470923389 +0300
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_y(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int x = kernel_split_params.tile.x + pixel_index;
+ kernel_do_adaptive_filter_y(kg, x, &kernel_split_params.tile);
+ }
+}
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_stopping.h b/intern/cycles/kernel/split/kernel_adaptive_stopping.h
--- a/intern/cycles/kernel/split/kernel_adaptive_stopping.h 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/split/kernel_adaptive_stopping.h 2020-01-10 20:42:43.470923389 +0300
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_stopping(KernelGlobals *kg)
+{
+ int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h &&
+ kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+ kernel_data.integrator.adaptive_min_samples) {
+ int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+ int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+ int buffer_offset = (kernel_split_params.tile.offset + x +
+ y * kernel_split_params.tile.stride) *
+ kernel_data.film.pass_stride;
+ ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+ kernel_do_adaptive_stopping(kg,
+ buffer,
+ kernel_split_params.tile.start_sample +
+ kernel_split_params.tile.num_samples - 1);
+ }
+}
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
--- a/intern/cycles/render/buffers.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/buffers.cpp 2020-01-10 20:42:43.470923389 +0300
@@ -258,6 +258,22 @@
return false;
}
+ float *sample_count = NULL;
+ if (type == PassType::PASS_COMBINED) {
+ int sample_offset = 0;
+ for (size_t j = 0; j < params.passes.size(); j++) {
+ Pass &pass = params.passes[j];
+ if (pass.type != PASS_SAMPLE_COUNT) {
+ sample_offset += pass.components;
+ continue;
+ }
+ else {
+ sample_count = buffer.data() + sample_offset;
+ break;
+ }
+ }
+ }
+
int pass_offset = 0;
for (size_t j = 0; j < params.passes.size(); j++) {
@@ -418,6 +434,11 @@
}
else {
for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+ if (sample_count && sample_count[i * pass_stride] < 0.0f) {
+ scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f;
+ scale_exposure = (pass.exposure) ? scale * exposure : scale;
+ }
+
float4 f = make_float4(in[0], in[1], in[2], in[3]);
pixels[0] = f.x * scale_exposure;
diff -Naur a/intern/cycles/render/buffers.cpp.orig b/intern/cycles/render/buffers.cpp.orig
--- a/intern/cycles/render/buffers.cpp.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/buffers.cpp.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+
+#include "render/buffers.h"
+#include "device/device.h"
+
+#include "util/util_foreach.h"
+#include "util/util_hash.h"
+#include "util/util_math.h"
+#include "util/util_opengl.h"
+#include "util/util_time.h"
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Buffer Params */
+
+BufferParams::BufferParams()
+{
+ width = 0;
+ height = 0;
+
+ full_x = 0;
+ full_y = 0;
+ full_width = 0;
+ full_height = 0;
+
+ denoising_data_pass = false;
+ denoising_clean_pass = false;
+ denoising_prefiltered_pass = false;
+}
+
+void BufferParams::get_offset_stride(int &offset, int &stride)
+{
+ offset = -(full_x + full_y * width);
+ stride = width;
+}
+
+bool BufferParams::modified(const BufferParams &params)
+{
+ return !(full_x == params.full_x && full_y == params.full_y && width == params.width &&
+ height == params.height && full_width == params.full_width &&
+ full_height == params.full_height && Pass::equals(passes, params.passes) &&
+ denoising_data_pass == params.denoising_data_pass &&
+ denoising_clean_pass == params.denoising_clean_pass &&
+ denoising_prefiltered_pass == params.denoising_prefiltered_pass);
+}
+
+int BufferParams::get_passes_size()
+{
+ int size = 0;
+
+ for (size_t i = 0; i < passes.size(); i++)
+ size += passes[i].components;
+
+ if (denoising_data_pass) {
+ size += DENOISING_PASS_SIZE_BASE;
+ if (denoising_clean_pass)
+ size += DENOISING_PASS_SIZE_CLEAN;
+ if (denoising_prefiltered_pass)
+ size += DENOISING_PASS_SIZE_PREFILTERED;
+ }
+
+ return align_up(size, 4);
+}
+
+int BufferParams::get_denoising_offset()
+{
+ int offset = 0;
+
+ for (size_t i = 0; i < passes.size(); i++)
+ offset += passes[i].components;
+
+ return offset;
+}
+
+int BufferParams::get_denoising_prefiltered_offset()
+{
+ assert(denoising_prefiltered_pass);
+
+ int offset = get_denoising_offset();
+
+ offset += DENOISING_PASS_SIZE_BASE;
+ if (denoising_clean_pass) {
+ offset += DENOISING_PASS_SIZE_CLEAN;
+ }
+
+ return offset;
+}
+
+/* Render Buffer Task */
+
+RenderTile::RenderTile()
+{
+ x = 0;
+ y = 0;
+ w = 0;
+ h = 0;
+
+ sample = 0;
+ start_sample = 0;
+ num_samples = 0;
+ resolution = 0;
+
+ offset = 0;
+ stride = 0;
+
+ buffer = 0;
+
+ buffers = NULL;
+}
+
+/* Render Buffers */
+
+RenderBuffers::RenderBuffers(Device *device)
+ : buffer(device, "RenderBuffers", MEM_READ_WRITE),
+ map_neighbor_copied(false),
+ render_time(0.0f)
+{
+}
+
+RenderBuffers::~RenderBuffers()
+{
+ buffer.free();
+}
+
+void RenderBuffers::reset(BufferParams &params_)
+{
+ params = params_;
+
+ /* re-allocate buffer */
+ buffer.alloc(params.width * params.height * params.get_passes_size());
+ buffer.zero_to_device();
+}
+
+void RenderBuffers::zero()
+{
+ buffer.zero_to_device();
+}
+
+bool RenderBuffers::copy_from_device()
+{
+ if (!buffer.device_pointer)
+ return false;
+
+ buffer.copy_from_device(0, params.width * params.get_passes_size(), params.height);
+
+ return true;
+}
+
+bool RenderBuffers::get_denoising_pass_rect(
+ int type, float exposure, int sample, int components, float *pixels)
+{
+ if (buffer.data() == NULL) {
+ return false;
+ }
+
+ float scale = 1.0f;
+ float alpha_scale = 1.0f / sample;
+ if (type == DENOISING_PASS_PREFILTERED_COLOR || type == DENOISING_PASS_CLEAN ||
+ type == DENOISING_PASS_PREFILTERED_INTENSITY) {
+ scale *= exposure;
+ }
+ else if (type == DENOISING_PASS_PREFILTERED_VARIANCE) {
+ scale *= exposure * exposure * (sample - 1);
+ }
+
+ int offset;
+ if (type == DENOISING_PASS_CLEAN) {
+ /* The clean pass isn't changed by prefiltering, so we use the original one there. */
+ offset = type + params.get_denoising_offset();
+ scale /= sample;
+ }
+ else if (params.denoising_prefiltered_pass) {
+ offset = type + params.get_denoising_prefiltered_offset();
+ }
+ else {
+ switch (type) {
+ case DENOISING_PASS_PREFILTERED_DEPTH:
+ offset = params.get_denoising_offset() + DENOISING_PASS_DEPTH;
+ break;
+ case DENOISING_PASS_PREFILTERED_NORMAL:
+ offset = params.get_denoising_offset() + DENOISING_PASS_NORMAL;
+ break;
+ case DENOISING_PASS_PREFILTERED_ALBEDO:
+ offset = params.get_denoising_offset() + DENOISING_PASS_ALBEDO;
+ break;
+ case DENOISING_PASS_PREFILTERED_COLOR:
+ /* If we're not saving the prefiltering result, return the original noisy pass. */
+ offset = params.get_denoising_offset() + DENOISING_PASS_COLOR;
+ break;
+ default:
+ return false;
+ }
+ scale /= sample;
+ }
+
+ int pass_stride = params.get_passes_size();
+ int size = params.width * params.height;
+
+ float *in = buffer.data() + offset;
+
+ if (components == 1) {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+ pixels[0] = in[0] * scale;
+ }
+ }
+ else if (components == 3) {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
+ pixels[0] = in[0] * scale;
+ pixels[1] = in[1] * scale;
+ pixels[2] = in[2] * scale;
+ }
+ }
+ else if (components == 4) {
+ /* Since the alpha channel is not involved in denoising, output the Combined alpha channel. */
+ assert(params.passes[0].type == PASS_COMBINED);
+ float *in_combined = buffer.data();
+
+ for (int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) {
+ float3 val = make_float3(in[0], in[1], in[2]);
+ if (type == DENOISING_PASS_PREFILTERED_COLOR && params.denoising_prefiltered_pass) {
+ /* Remove highlight compression from the image. */
+ val = color_highlight_uncompress(val);
+ }
+ pixels[0] = val.x * scale;
+ pixels[1] = val.y * scale;
+ pixels[2] = val.z * scale;
+ pixels[3] = saturate(in_combined[3] * alpha_scale);
+ }
+ }
+ else {
+ return false;
+ }
+
+ return true;
+}
+
+bool RenderBuffers::get_pass_rect(
+ const string &name, float exposure, int sample, int components, float *pixels)
+{
+ if (buffer.data() == NULL) {
+ return false;
+ }
+
+ int pass_offset = 0;
+
+ for (size_t j = 0; j < params.passes.size(); j++) {
+ Pass &pass = params.passes[j];
+
+ /* Pass is identified by both type and name, multiple of the same type
+ * may exist with a different name. */
+ if (pass.name != name) {
+ pass_offset += pass.components;
+ continue;
+ }
+
+ PassType type = pass.type;
+
+ float *in = buffer.data() + pass_offset;
+ int pass_stride = params.get_passes_size();
+
+ float scale = (pass.filter) ? 1.0f / (float)sample : 1.0f;
+ float scale_exposure = (pass.exposure) ? scale * exposure : scale;
+
+ int size = params.width * params.height;
+
+ if (components == 1 && type == PASS_RENDER_TIME) {
+ /* Render time is not stored by kernel, but measured per tile. */
+ float val = (float)(1000.0 * render_time / (params.width * params.height * sample));
+ for (int i = 0; i < size; i++, pixels++) {
+ pixels[0] = val;
+ }
+ }
+ else if (components == 1) {
+ assert(pass.components == components);
+
+ /* Scalar */
+ if (type == PASS_DEPTH) {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+ float f = *in;
+ pixels[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
+ }
+ }
+ else if (type == PASS_MIST) {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+ float f = *in;
+ pixels[0] = saturate(f * scale_exposure);
+ }
+ }
+#ifdef WITH_CYCLES_DEBUG
+ else if (type == PASS_BVH_TRAVERSED_NODES || type == PASS_BVH_TRAVERSED_INSTANCES ||
+ type == PASS_BVH_INTERSECTIONS || type == PASS_RAY_BOUNCES) {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+ float f = *in;
+ pixels[0] = f * scale;
+ }
+ }
+#endif
+ else {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+ float f = *in;
+ pixels[0] = f * scale_exposure;
+ }
+ }
+ }
+ else if (components == 3) {
+ assert(pass.components == 4);
+
+ /* RGBA */
+ if (type == PASS_SHADOW) {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
+ float4 f = make_float4(in[0], in[1], in[2], in[3]);
+ float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f;
+
+ pixels[0] = f.x * invw;
+ pixels[1] = f.y * invw;
+ pixels[2] = f.z * invw;
+ }
+ }
+ else if (pass.divide_type != PASS_NONE) {
+ /* RGB lighting passes that need to divide out color */
+ pass_offset = 0;
+ for (size_t k = 0; k < params.passes.size(); k++) {
+ Pass &color_pass = params.passes[k];
+ if (color_pass.type == pass.divide_type)
+ break;
+ pass_offset += color_pass.components;
+ }
+
+ float *in_divide = buffer.data() + pass_offset;
+
+ for (int i = 0; i < size; i++, in += pass_stride, in_divide += pass_stride, pixels += 3) {
+ float3 f = make_float3(in[0], in[1], in[2]);
+ float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
+
+ f = safe_divide_even_color(f * exposure, f_divide);
+
+ pixels[0] = f.x;
+ pixels[1] = f.y;
+ pixels[2] = f.z;
+ }
+ }
+ else {
+ /* RGB/vector */
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
+ float3 f = make_float3(in[0], in[1], in[2]);
+
+ pixels[0] = f.x * scale_exposure;
+ pixels[1] = f.y * scale_exposure;
+ pixels[2] = f.z * scale_exposure;
+ }
+ }
+ }
+ else if (components == 4) {
+ assert(pass.components == components);
+
+ /* RGBA */
+ if (type == PASS_SHADOW) {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+ float4 f = make_float4(in[0], in[1], in[2], in[3]);
+ float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f;
+
+ pixels[0] = f.x * invw;
+ pixels[1] = f.y * invw;
+ pixels[2] = f.z * invw;
+ pixels[3] = 1.0f;
+ }
+ }
+ else if (type == PASS_MOTION) {
+ /* need to normalize by number of samples accumulated for motion */
+ pass_offset = 0;
+ for (size_t k = 0; k < params.passes.size(); k++) {
+ Pass &color_pass = params.passes[k];
+ if (color_pass.type == PASS_MOTION_WEIGHT)
+ break;
+ pass_offset += color_pass.components;
+ }
+
+ float *in_weight = buffer.data() + pass_offset;
+
+ for (int i = 0; i < size; i++, in += pass_stride, in_weight += pass_stride, pixels += 4) {
+ float4 f = make_float4(in[0], in[1], in[2], in[3]);
+ float w = in_weight[0];
+ float invw = (w > 0.0f) ? 1.0f / w : 0.0f;
+
+ pixels[0] = f.x * invw;
+ pixels[1] = f.y * invw;
+ pixels[2] = f.z * invw;
+ pixels[3] = f.w * invw;
+ }
+ }
+ else if (type == PASS_CRYPTOMATTE) {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+ float4 f = make_float4(in[0], in[1], in[2], in[3]);
+ /* x and z contain integer IDs, don't rescale them.
+ y and w contain matte weights, they get scaled. */
+ pixels[0] = f.x;
+ pixels[1] = f.y * scale;
+ pixels[2] = f.z;
+ pixels[3] = f.w * scale;
+ }
+ }
+ else {
+ for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+ float4 f = make_float4(in[0], in[1], in[2], in[3]);
+
+ pixels[0] = f.x * scale_exposure;
+ pixels[1] = f.y * scale_exposure;
+ pixels[2] = f.z * scale_exposure;
+
+ /* clamp since alpha might be > 1.0 due to russian roulette */
+ pixels[3] = saturate(f.w * scale);
+ }
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Display Buffer */
+
+DisplayBuffer::DisplayBuffer(Device *device, bool linear)
+ : draw_width(0),
+ draw_height(0),
+ transparent(true), /* todo: determine from background */
+ half_float(linear),
+ rgba_byte(device, "display buffer byte"),
+ rgba_half(device, "display buffer half")
+{
+}
+
+DisplayBuffer::~DisplayBuffer()
+{
+ rgba_byte.free();
+ rgba_half.free();
+}
+
+void DisplayBuffer::reset(BufferParams &params_)
+{
+ draw_width = 0;
+ draw_height = 0;
+
+ params = params_;
+
+ /* allocate display pixels */
+ if (half_float) {
+ rgba_half.alloc_to_device(params.width, params.height);
+ }
+ else {
+ rgba_byte.alloc_to_device(params.width, params.height);
+ }
+}
+
+void DisplayBuffer::draw_set(int width, int height)
+{
+ assert(width <= params.width && height <= params.height);
+
+ draw_width = width;
+ draw_height = height;
+}
+
+void DisplayBuffer::draw(Device *device, const DeviceDrawParams &draw_params)
+{
+ if (draw_width != 0 && draw_height != 0) {
+ device_memory &rgba = (half_float) ? (device_memory &)rgba_half : (device_memory &)rgba_byte;
+
+ device->draw_pixels(rgba,
+ 0,
+ draw_width,
+ draw_height,
+ params.width,
+ params.height,
+ params.full_x,
+ params.full_y,
+ params.full_width,
+ params.full_height,
+ transparent,
+ draw_params);
+ }
+}
+
+bool DisplayBuffer::draw_ready()
+{
+ return (draw_width != 0 && draw_height != 0);
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
--- a/intern/cycles/render/CMakeLists.txt 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/CMakeLists.txt 2020-01-10 20:42:43.470923389 +0300
@@ -22,6 +22,7 @@
graph.cpp
image.cpp
integrator.cpp
+ jitter.cpp
light.cpp
merge.cpp
mesh.cpp
@@ -58,6 +59,7 @@
image.h
integrator.h
light.h
+ jitter.h
merge.h
mesh.h
nodes.h
diff -Naur a/intern/cycles/render/CMakeLists.txt.orig b/intern/cycles/render/CMakeLists.txt.orig
--- a/intern/cycles/render/CMakeLists.txt.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/CMakeLists.txt.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,93 @@
+
+set(INC
+ ..
+ ../../glew-mx
+)
+
+set(INC_SYS
+ ${GLEW_INCLUDE_DIR}
+)
+
+set(SRC
+ attribute.cpp
+ background.cpp
+ bake.cpp
+ buffers.cpp
+ camera.cpp
+ colorspace.cpp
+ constant_fold.cpp
+ coverage.cpp
+ denoising.cpp
+ film.cpp
+ graph.cpp
+ image.cpp
+ integrator.cpp
+ light.cpp
+ merge.cpp
+ mesh.cpp
+ mesh_displace.cpp
+ mesh_subdivision.cpp
+ mesh_volume.cpp
+ nodes.cpp
+ object.cpp
+ osl.cpp
+ particles.cpp
+ curves.cpp
+ scene.cpp
+ session.cpp
+ shader.cpp
+ sobol.cpp
+ stats.cpp
+ svm.cpp
+ tables.cpp
+ tile.cpp
+)
+
+set(SRC_HEADERS
+ attribute.h
+ bake.h
+ background.h
+ buffers.h
+ camera.h
+ colorspace.h
+ constant_fold.h
+ coverage.h
+ denoising.h
+ film.h
+ graph.h
+ image.h
+ integrator.h
+ light.h
+ merge.h
+ mesh.h
+ nodes.h
+ object.h
+ osl.h
+ particles.h
+ curves.h
+ scene.h
+ session.h
+ shader.h
+ sobol.h
+ stats.h
+ svm.h
+ tables.h
+ tile.h
+)
+
+set(LIB
+ cycles_bvh
+)
+
+if(WITH_CYCLES_OSL)
+ list(APPEND LIB
+ cycles_kernel_osl
+ )
+endif()
+
+include_directories(${INC})
+include_directories(SYSTEM ${INC_SYS})
+
+add_definitions(${GL_DEFINITIONS})
+
+cycles_add_library(cycles_render "${LIB}" ${SRC} ${SRC_HEADERS})
diff -Naur a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
--- a/intern/cycles/render/film.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/film.cpp 2020-01-10 20:42:43.470923389 +0300
@@ -196,6 +196,13 @@
case PASS_AOV_VALUE:
pass.components = 1;
break;
+ case PASS_ADAPTIVE_AUX_BUFFER:
+ pass.components = 4;
+ break;
+ case PASS_SAMPLE_COUNT:
+ pass.components = 1;
+ pass.exposure = false;
+ break;
default:
assert(false);
break;
@@ -318,6 +325,7 @@
SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false);
SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false);
SOCKET_INT(denoising_flags, "Denoising Flags", 0);
+ SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false);
return type;
}
@@ -507,6 +515,12 @@
have_aov_value = true;
}
break;
+ case PASS_ADAPTIVE_AUX_BUFFER:
+ kfilm->pass_adaptive_aux_buffer = kfilm->pass_stride;
+ break;
+ case PASS_SAMPLE_COUNT:
+ kfilm->pass_sample_count = kfilm->pass_stride;
+ break;
default:
assert(false);
break;
diff -Naur a/intern/cycles/render/film.cpp.orig b/intern/cycles/render/film.cpp.orig
--- a/intern/cycles/render/film.cpp.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/film.cpp.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,638 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/camera.h"
+#include "device/device.h"
+#include "render/film.h"
+#include "render/integrator.h"
+#include "render/mesh.h"
+#include "render/scene.h"
+#include "render/tables.h"
+
+#include "util/util_algorithm.h"
+#include "util/util_foreach.h"
+#include "util/util_math.h"
+#include "util/util_math_cdf.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Pass */
+
+static bool compare_pass_order(const Pass &a, const Pass &b)
+{
+ if (a.components == b.components)
+ return (a.type < b.type);
+ return (a.components > b.components);
+}
+
+void Pass::add(PassType type, vector<Pass> &passes, const char *name)
+{
+ for (size_t i = 0; i < passes.size(); i++) {
+ if (passes[i].type != type) {
+ continue;
+ }
+
+ /* An empty name is used as a placeholder to signal that any pass of
+ * that type is fine (because the content always is the same).
+ * This is important to support divide_type: If the pass that has a
+ * divide_type is added first, a pass for divide_type with an empty
+ * name will be added. Then, if a matching pass with a name is later
+ * requested, the existing placeholder will be renamed to that.
+ * If the divide_type is explicitly allocated with a name first and
+ * then again as part of another pass, the second one will just be
+ * skipped because that type already exists. */
+
+ /* If no name is specified, any pass of the correct type will match. */
+ if (name == NULL) {
+ return;
+ }
+
+ /* If we already have a placeholder pass, rename that one. */
+ if (passes[i].name.empty()) {
+ passes[i].name = name;
+ return;
+ }
+
+ /* If neither existing nor requested pass have placeholder name, they
+ * must match. */
+ if (name == passes[i].name) {
+ return;
+ }
+ }
+
+ Pass pass;
+
+ pass.type = type;
+ pass.filter = true;
+ pass.exposure = false;
+ pass.divide_type = PASS_NONE;
+ if (name) {
+ pass.name = name;
+ }
+
+ switch (type) {
+ case PASS_NONE:
+ pass.components = 0;
+ break;
+ case PASS_COMBINED:
+ pass.components = 4;
+ pass.exposure = true;
+ break;
+ case PASS_DEPTH:
+ pass.components = 1;
+ pass.filter = false;
+ break;
+ case PASS_MIST:
+ pass.components = 1;
+ break;
+ case PASS_NORMAL:
+ pass.components = 4;
+ break;
+ case PASS_UV:
+ pass.components = 4;
+ break;
+ case PASS_MOTION:
+ pass.components = 4;
+ pass.divide_type = PASS_MOTION_WEIGHT;
+ break;
+ case PASS_MOTION_WEIGHT:
+ pass.components = 1;
+ break;
+ case PASS_OBJECT_ID:
+ case PASS_MATERIAL_ID:
+ pass.components = 1;
+ pass.filter = false;
+ break;
+
+ case PASS_EMISSION:
+ case PASS_BACKGROUND:
+ pass.components = 4;
+ pass.exposure = true;
+ break;
+ case PASS_AO:
+ pass.components = 4;
+ break;
+ case PASS_SHADOW:
+ pass.components = 4;
+ pass.exposure = false;
+ break;
+ case PASS_LIGHT:
+ /* This isn't a real pass, used by baking to see whether
+ * light data is needed or not.
+ *
+ * Set components to 0 so pass sort below happens in a
+ * determined way.
+ */
+ pass.components = 0;
+ break;
+#ifdef WITH_CYCLES_DEBUG
+ case PASS_BVH_TRAVERSED_NODES:
+ case PASS_BVH_TRAVERSED_INSTANCES:
+ case PASS_BVH_INTERSECTIONS:
+ case PASS_RAY_BOUNCES:
+ pass.components = 1;
+ pass.exposure = false;
+ break;
+#endif
+ case PASS_RENDER_TIME:
+ /* This pass is handled entirely on the host side. */
+ pass.components = 0;
+ break;
+
+ case PASS_DIFFUSE_COLOR:
+ case PASS_GLOSSY_COLOR:
+ case PASS_TRANSMISSION_COLOR:
+ case PASS_SUBSURFACE_COLOR:
+ pass.components = 4;
+ break;
+ case PASS_DIFFUSE_DIRECT:
+ case PASS_DIFFUSE_INDIRECT:
+ pass.components = 4;
+ pass.exposure = true;
+ pass.divide_type = PASS_DIFFUSE_COLOR;
+ break;
+ case PASS_GLOSSY_DIRECT:
+ case PASS_GLOSSY_INDIRECT:
+ pass.components = 4;
+ pass.exposure = true;
+ pass.divide_type = PASS_GLOSSY_COLOR;
+ break;
+ case PASS_TRANSMISSION_DIRECT:
+ case PASS_TRANSMISSION_INDIRECT:
+ pass.components = 4;
+ pass.exposure = true;
+ pass.divide_type = PASS_TRANSMISSION_COLOR;
+ break;
+ case PASS_SUBSURFACE_DIRECT:
+ case PASS_SUBSURFACE_INDIRECT:
+ pass.components = 4;
+ pass.exposure = true;
+ pass.divide_type = PASS_SUBSURFACE_COLOR;
+ break;
+ case PASS_VOLUME_DIRECT:
+ case PASS_VOLUME_INDIRECT:
+ pass.components = 4;
+ pass.exposure = true;
+ break;
+ case PASS_CRYPTOMATTE:
+ pass.components = 4;
+ break;
+ case PASS_AOV_COLOR:
+ pass.components = 4;
+ break;
+ case PASS_AOV_VALUE:
+ pass.components = 1;
+ break;
+ default:
+ assert(false);
+ break;
+ }
+
+ passes.push_back(pass);
+
+ /* order from by components, to ensure alignment so passes with size 4
+ * come first and then passes with size 1 */
+ sort(&passes[0], &passes[0] + passes.size(), compare_pass_order);
+
+ if (pass.divide_type != PASS_NONE)
+ Pass::add(pass.divide_type, passes);
+}
+
+bool Pass::equals(const vector<Pass> &A, const vector<Pass> &B)
+{
+ if (A.size() != B.size())
+ return false;
+
+ for (int i = 0; i < A.size(); i++)
+ if (A[i].type != B[i].type || A[i].name != B[i].name)
+ return false;
+
+ return true;
+}
+
+bool Pass::contains(const vector<Pass> &passes, PassType type)
+{
+ for (size_t i = 0; i < passes.size(); i++)
+ if (passes[i].type == type)
+ return true;
+
+ return false;
+}
+
+/* Pixel Filter */
+
+static float filter_func_box(float /*v*/, float /*width*/)
+{
+ return 1.0f;
+}
+
+static float filter_func_gaussian(float v, float width)
+{
+ v *= 6.0f / width;
+ return expf(-2.0f * v * v);
+}
+
+static float filter_func_blackman_harris(float v, float width)
+{
+ v = M_2PI_F * (v / width + 0.5f);
+ return 0.35875f - 0.48829f * cosf(v) + 0.14128f * cosf(2.0f * v) - 0.01168f * cosf(3.0f * v);
+}
+
+static vector<float> filter_table(FilterType type, float width)
+{
+ vector<float> filter_table(FILTER_TABLE_SIZE);
+ float (*filter_func)(float, float) = NULL;
+
+ switch (type) {
+ case FILTER_BOX:
+ filter_func = filter_func_box;
+ break;
+ case FILTER_GAUSSIAN:
+ filter_func = filter_func_gaussian;
+ width *= 3.0f;
+ break;
+ case FILTER_BLACKMAN_HARRIS:
+ filter_func = filter_func_blackman_harris;
+ width *= 2.0f;
+ break;
+ default:
+ assert(0);
+ }
+
+ /* Create importance sampling table. */
+
+ /* TODO(sergey): With the even filter table size resolution we can not
+ * really make it nice symmetric importance map without sampling full range
+ * (meaning, we would need to sample full filter range and not use the
+ * make_symmetric argument).
+ *
+ * Current code matches exactly initial filter table code, but we should
+ * consider either making FILTER_TABLE_SIZE odd value or sample full filter.
+ */
+
+ util_cdf_inverted(FILTER_TABLE_SIZE,
+ 0.0f,
+ width * 0.5f,
+ function_bind(filter_func, _1, width),
+ true,
+ filter_table);
+
+ return filter_table;
+}
+
+/* Film */
+
+NODE_DEFINE(Film)
+{
+ NodeType *type = NodeType::add("film", create);
+
+ SOCKET_FLOAT(exposure, "Exposure", 0.8f);
+ SOCKET_FLOAT(pass_alpha_threshold, "Pass Alpha Threshold", 0.0f);
+
+ static NodeEnum filter_enum;
+ filter_enum.insert("box", FILTER_BOX);
+ filter_enum.insert("gaussian", FILTER_GAUSSIAN);
+ filter_enum.insert("blackman_harris", FILTER_BLACKMAN_HARRIS);
+
+ SOCKET_ENUM(filter_type, "Filter Type", filter_enum, FILTER_BOX);
+ SOCKET_FLOAT(filter_width, "Filter Width", 1.0f);
+
+ SOCKET_FLOAT(mist_start, "Mist Start", 0.0f);
+ SOCKET_FLOAT(mist_depth, "Mist Depth", 100.0f);
+ SOCKET_FLOAT(mist_falloff, "Mist Falloff", 1.0f);
+
+ SOCKET_BOOLEAN(denoising_data_pass, "Generate Denoising Data Pass", false);
+ SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false);
+ SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false);
+ SOCKET_INT(denoising_flags, "Denoising Flags", 0);
+
+ return type;
+}
+
+Film::Film() : Node(node_type)
+{
+ use_light_visibility = false;
+ filter_table_offset = TABLE_OFFSET_INVALID;
+ cryptomatte_passes = CRYPT_NONE;
+
+ need_update = true;
+}
+
+Film::~Film()
+{
+}
+
+void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
+{
+ if (!need_update)
+ return;
+
+ device_free(device, dscene, scene);
+
+ KernelFilm *kfilm = &dscene->data.film;
+
+ /* update __data */
+ kfilm->exposure = exposure;
+ kfilm->pass_flag = 0;
+
+ kfilm->display_pass_stride = -1;
+ kfilm->display_pass_components = 0;
+ kfilm->display_divide_pass_stride = -1;
+ kfilm->use_display_exposure = false;
+ kfilm->use_display_pass_alpha = (display_pass == PASS_COMBINED);
+
+ kfilm->light_pass_flag = 0;
+ kfilm->pass_stride = 0;
+ kfilm->use_light_pass = use_light_visibility;
+
+ bool have_cryptomatte = false, have_aov_color = false, have_aov_value = false;
+
+ for (size_t i = 0; i < passes.size(); i++) {
+ Pass &pass = passes[i];
+
+ if (pass.type == PASS_NONE) {
+ continue;
+ }
+
+ /* Can't do motion pass if no motion vectors are available. */
+ if (pass.type == PASS_MOTION || pass.type == PASS_MOTION_WEIGHT) {
+ if (scene->need_motion() != Scene::MOTION_PASS) {
+ kfilm->pass_stride += pass.components;
+ continue;
+ }
+ }
+
+ int pass_flag = (1 << (pass.type % 32));
+ if (pass.type <= PASS_CATEGORY_MAIN_END) {
+ kfilm->pass_flag |= pass_flag;
+ }
+ else {
+ assert(pass.type <= PASS_CATEGORY_LIGHT_END);
+ kfilm->use_light_pass = 1;
+ kfilm->light_pass_flag |= pass_flag;
+ }
+
+ switch (pass.type) {
+ case PASS_COMBINED:
+ kfilm->pass_combined = kfilm->pass_stride;
+ break;
+ case PASS_DEPTH:
+ kfilm->pass_depth = kfilm->pass_stride;
+ break;
+ case PASS_NORMAL:
+ kfilm->pass_normal = kfilm->pass_stride;
+ break;
+ case PASS_UV:
+ kfilm->pass_uv = kfilm->pass_stride;
+ break;
+ case PASS_MOTION:
+ kfilm->pass_motion = kfilm->pass_stride;
+ break;
+ case PASS_MOTION_WEIGHT:
+ kfilm->pass_motion_weight = kfilm->pass_stride;
+ break;
+ case PASS_OBJECT_ID:
+ kfilm->pass_object_id = kfilm->pass_stride;
+ break;
+ case PASS_MATERIAL_ID:
+ kfilm->pass_material_id = kfilm->pass_stride;
+ break;
+
+ case PASS_MIST:
+ kfilm->pass_mist = kfilm->pass_stride;
+ break;
+ case PASS_EMISSION:
+ kfilm->pass_emission = kfilm->pass_stride;
+ break;
+ case PASS_BACKGROUND:
+ kfilm->pass_background = kfilm->pass_stride;
+ break;
+ case PASS_AO:
+ kfilm->pass_ao = kfilm->pass_stride;
+ break;
+ case PASS_SHADOW:
+ kfilm->pass_shadow = kfilm->pass_stride;
+ break;
+
+ case PASS_LIGHT:
+ break;
+
+ case PASS_DIFFUSE_COLOR:
+ kfilm->pass_diffuse_color = kfilm->pass_stride;
+ break;
+ case PASS_GLOSSY_COLOR:
+ kfilm->pass_glossy_color = kfilm->pass_stride;
+ break;
+ case PASS_TRANSMISSION_COLOR:
+ kfilm->pass_transmission_color = kfilm->pass_stride;
+ break;
+ case PASS_SUBSURFACE_COLOR:
+ kfilm->pass_subsurface_color = kfilm->pass_stride;
+ break;
+ case PASS_DIFFUSE_INDIRECT:
+ kfilm->pass_diffuse_indirect = kfilm->pass_stride;
+ break;
+ case PASS_GLOSSY_INDIRECT:
+ kfilm->pass_glossy_indirect = kfilm->pass_stride;
+ break;
+ case PASS_TRANSMISSION_INDIRECT:
+ kfilm->pass_transmission_indirect = kfilm->pass_stride;
+ break;
+ case PASS_SUBSURFACE_INDIRECT:
+ kfilm->pass_subsurface_indirect = kfilm->pass_stride;
+ break;
+ case PASS_VOLUME_INDIRECT:
+ kfilm->pass_volume_indirect = kfilm->pass_stride;
+ break;
+ case PASS_DIFFUSE_DIRECT:
+ kfilm->pass_diffuse_direct = kfilm->pass_stride;
+ break;
+ case PASS_GLOSSY_DIRECT:
+ kfilm->pass_glossy_direct = kfilm->pass_stride;
+ break;
+ case PASS_TRANSMISSION_DIRECT:
+ kfilm->pass_transmission_direct = kfilm->pass_stride;
+ break;
+ case PASS_SUBSURFACE_DIRECT:
+ kfilm->pass_subsurface_direct = kfilm->pass_stride;
+ break;
+ case PASS_VOLUME_DIRECT:
+ kfilm->pass_volume_direct = kfilm->pass_stride;
+ break;
+
+#ifdef WITH_CYCLES_DEBUG
+ case PASS_BVH_TRAVERSED_NODES:
+ kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride;
+ break;
+ case PASS_BVH_TRAVERSED_INSTANCES:
+ kfilm->pass_bvh_traversed_instances = kfilm->pass_stride;
+ break;
+ case PASS_BVH_INTERSECTIONS:
+ kfilm->pass_bvh_intersections = kfilm->pass_stride;
+ break;
+ case PASS_RAY_BOUNCES:
+ kfilm->pass_ray_bounces = kfilm->pass_stride;
+ break;
+#endif
+ case PASS_RENDER_TIME:
+ break;
+ case PASS_CRYPTOMATTE:
+ kfilm->pass_cryptomatte = have_cryptomatte ?
+ min(kfilm->pass_cryptomatte, kfilm->pass_stride) :
+ kfilm->pass_stride;
+ have_cryptomatte = true;
+ break;
+ case PASS_AOV_COLOR:
+ if (!have_aov_color) {
+ kfilm->pass_aov_color = kfilm->pass_stride;
+ have_aov_color = true;
+ }
+ break;
+ case PASS_AOV_VALUE:
+ if (!have_aov_value) {
+ kfilm->pass_aov_value = kfilm->pass_stride;
+ have_aov_value = true;
+ }
+ break;
+ default:
+ assert(false);
+ break;
+ }
+
+ if (pass.type == display_pass) {
+ kfilm->display_pass_stride = kfilm->pass_stride;
+ kfilm->display_pass_components = pass.components;
+ kfilm->use_display_exposure = pass.exposure && (kfilm->exposure != 1.0f);
+ }
+ else if (pass.type == PASS_DIFFUSE_COLOR || pass.type == PASS_TRANSMISSION_COLOR ||
+ pass.type == PASS_GLOSSY_COLOR || pass.type == PASS_SUBSURFACE_COLOR) {
+ kfilm->display_divide_pass_stride = kfilm->pass_stride;
+ }
+
+ kfilm->pass_stride += pass.components;
+ }
+
+ kfilm->pass_denoising_data = 0;
+ kfilm->pass_denoising_clean = 0;
+ kfilm->denoising_flags = 0;
+ if (denoising_data_pass) {
+ kfilm->pass_denoising_data = kfilm->pass_stride;
+ kfilm->pass_stride += DENOISING_PASS_SIZE_BASE;
+ kfilm->denoising_flags = denoising_flags;
+ if (denoising_clean_pass) {
+ kfilm->pass_denoising_clean = kfilm->pass_stride;
+ kfilm->pass_stride += DENOISING_PASS_SIZE_CLEAN;
+ kfilm->use_light_pass = 1;
+ }
+ if (denoising_prefiltered_pass) {
+ kfilm->pass_stride += DENOISING_PASS_SIZE_PREFILTERED;
+ }
+ }
+
+ kfilm->pass_stride = align_up(kfilm->pass_stride, 4);
+
+ /* When displaying the normal/uv pass in the viewport we need to disable
+ * transparency.
+ *
+ * We also don't need to perform light accumulations. Later we want to optimize this to suppress
+ * light calculations. */
+ if (display_pass == PASS_NORMAL || display_pass == PASS_UV) {
+ kfilm->use_light_pass = 0;
+ }
+ else {
+ kfilm->pass_alpha_threshold = pass_alpha_threshold;
+ }
+
+ /* update filter table */
+ vector<float> table = filter_table(filter_type, filter_width);
+ scene->lookup_tables->remove_table(&filter_table_offset);
+ filter_table_offset = scene->lookup_tables->add_table(dscene, table);
+ kfilm->filter_table_offset = (int)filter_table_offset;
+
+ /* mist pass parameters */
+ kfilm->mist_start = mist_start;
+ kfilm->mist_inv_depth = (mist_depth > 0.0f) ? 1.0f / mist_depth : 0.0f;
+ kfilm->mist_falloff = mist_falloff;
+
+ kfilm->cryptomatte_passes = cryptomatte_passes;
+ kfilm->cryptomatte_depth = cryptomatte_depth;
+
+ pass_stride = kfilm->pass_stride;
+ denoising_data_offset = kfilm->pass_denoising_data;
+ denoising_clean_offset = kfilm->pass_denoising_clean;
+
+ need_update = false;
+}
+
+void Film::device_free(Device * /*device*/, DeviceScene * /*dscene*/, Scene *scene)
+{
+ scene->lookup_tables->remove_table(&filter_table_offset);
+}
+
+bool Film::modified(const Film &film)
+{
+ return !Node::equals(film) || !Pass::equals(passes, film.passes);
+}
+
+void Film::tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes)
+{
+ if (Pass::contains(passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) {
+ scene->mesh_manager->tag_update(scene);
+
+ foreach (Shader *shader, scene->shaders)
+ shader->need_update_mesh = true;
+ }
+ else if (Pass::contains(passes, PASS_MOTION) != Pass::contains(passes_, PASS_MOTION)) {
+ scene->mesh_manager->tag_update(scene);
+ }
+ else if (Pass::contains(passes, PASS_AO) != Pass::contains(passes_, PASS_AO)) {
+ scene->integrator->tag_update(scene);
+ }
+
+ if (update_passes) {
+ passes = passes_;
+ }
+}
+
+void Film::tag_update(Scene * /*scene*/)
+{
+ need_update = true;
+}
+
+int Film::get_aov_offset(string name, bool &is_color)
+{
+ int num_color = 0, num_value = 0;
+ foreach (const Pass &pass, passes) {
+ if (pass.type == PASS_AOV_COLOR) {
+ num_color++;
+ }
+ else if (pass.type == PASS_AOV_VALUE) {
+ num_value++;
+ }
+ else {
+ continue;
+ }
+
+ if (pass.name == name) {
+ is_color = (pass.type == PASS_AOV_COLOR);
+ return (is_color ? num_color : num_value) - 1;
+ }
+ }
+
+ return -1;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/film.h b/intern/cycles/render/film.h
--- a/intern/cycles/render/film.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/film.h 2020-01-10 20:42:43.470923389 +0300
@@ -81,6 +81,8 @@
CryptomatteType cryptomatte_passes;
int cryptomatte_depth;
+ bool use_adaptive_sampling;
+
bool need_update;
Film();
diff -Naur a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
--- a/intern/cycles/render/integrator.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/integrator.cpp 2020-01-10 20:42:43.470923389 +0300
@@ -18,12 +18,14 @@
#include "render/background.h"
#include "render/integrator.h"
#include "render/film.h"
+#include "render/jitter.h"
#include "render/light.h"
#include "render/scene.h"
#include "render/shader.h"
#include "render/sobol.h"
#include "util/util_foreach.h"
+#include "util/util_logging.h"
#include "util/util_hash.h"
CCL_NAMESPACE_BEGIN
@@ -66,6 +68,9 @@
SOCKET_INT(volume_samples, "Volume Samples", 1);
SOCKET_INT(start_sample, "Start Sample", 0);
+ SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.0f);
+ SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0);
+
SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true);
SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true);
SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f);
@@ -78,6 +83,7 @@
static NodeEnum sampling_pattern_enum;
sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ);
+ sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ);
SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);
return type;
@@ -174,6 +180,22 @@
kintegrator->sampling_pattern = sampling_pattern;
kintegrator->aa_samples = aa_samples;
+ if (aa_samples > 0 && adaptive_min_samples == 0) {
+ kintegrator->adaptive_min_samples = max(4, (int)sqrtf(aa_samples));
+ VLOG(1) << "Cycles adaptive sampling: automatic min samples = "
+ << kintegrator->adaptive_min_samples;
+ }
+ else {
+ kintegrator->adaptive_min_samples = max(4, adaptive_min_samples);
+ }
+ if (aa_samples > 0 && adaptive_threshold == 0.0f) {
+ kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples);
+ VLOG(1) << "Cycles adaptive sampling: automatic threshold = "
+ << kintegrator->adaptive_threshold;
+ }
+ else {
+ kintegrator->adaptive_threshold = adaptive_threshold;
+ }
if (light_sampling_threshold > 0.0f) {
kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
@@ -203,18 +225,34 @@
int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM;
dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS);
- uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions);
+ if (sampling_pattern == SAMPLING_PATTERN_SOBOL) {
+ uint *directions = dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions);
- sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
+ sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
- dscene->sobol_directions.copy_to_device();
+ dscene->sample_pattern_lut.copy_to_device();
+ }
+ else {
+ constexpr int sequence_size = 64 * 64;
+ constexpr int num_sequences = 48;
+ float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
+ 2);
+ TaskPool pool;
+ for (int j = 0; j < num_sequences; ++j) {
+ float2 *sequence = directions + j * sequence_size;
+ pool.push(
+ function_bind(&progressive_multi_jitter_02_generate_2D, sequence, sequence_size, j));
+ }
+ pool.wait_work();
+ dscene->sample_pattern_lut.copy_to_device();
+ }
need_update = false;
}
void Integrator::device_free(Device *, DeviceScene *dscene)
{
- dscene->sobol_directions.free();
+ dscene->sample_pattern_lut.free();
}
bool Integrator::modified(const Integrator &integrator)
diff -Naur a/intern/cycles/render/integrator.cpp.orig b/intern/cycles/render/integrator.cpp.orig
--- a/intern/cycles/render/integrator.cpp.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/integrator.cpp.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/device.h"
+#include "render/background.h"
+#include "render/integrator.h"
+#include "render/film.h"
+#include "render/light.h"
+#include "render/scene.h"
+#include "render/shader.h"
+#include "render/sobol.h"
+
+#include "util/util_foreach.h"
+#include "util/util_hash.h"
+
+CCL_NAMESPACE_BEGIN
+
+NODE_DEFINE(Integrator)
+{
+ NodeType *type = NodeType::add("integrator", create);
+
+ SOCKET_INT(min_bounce, "Min Bounce", 0);
+ SOCKET_INT(max_bounce, "Max Bounce", 7);
+
+ SOCKET_INT(max_diffuse_bounce, "Max Diffuse Bounce", 7);
+ SOCKET_INT(max_glossy_bounce, "Max Glossy Bounce", 7);
+ SOCKET_INT(max_transmission_bounce, "Max Transmission Bounce", 7);
+ SOCKET_INT(max_volume_bounce, "Max Volume Bounce", 7);
+
+ SOCKET_INT(transparent_min_bounce, "Transparent Min Bounce", 0);
+ SOCKET_INT(transparent_max_bounce, "Transparent Max Bounce", 7);
+
+ SOCKET_INT(ao_bounces, "AO Bounces", 0);
+
+ SOCKET_INT(volume_max_steps, "Volume Max Steps", 1024);
+ SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f);
+
+ SOCKET_BOOLEAN(caustics_reflective, "Reflective Caustics", true);
+ SOCKET_BOOLEAN(caustics_refractive, "Refractive Caustics", true);
+ SOCKET_FLOAT(filter_glossy, "Filter Glossy", 0.0f);
+ SOCKET_INT(seed, "Seed", 0);
+ SOCKET_FLOAT(sample_clamp_direct, "Sample Clamp Direct", 0.0f);
+ SOCKET_FLOAT(sample_clamp_indirect, "Sample Clamp Indirect", 0.0f);
+ SOCKET_BOOLEAN(motion_blur, "Motion Blur", false);
+
+ SOCKET_INT(aa_samples, "AA Samples", 0);
+ SOCKET_INT(diffuse_samples, "Diffuse Samples", 1);
+ SOCKET_INT(glossy_samples, "Glossy Samples", 1);
+ SOCKET_INT(transmission_samples, "Transmission Samples", 1);
+ SOCKET_INT(ao_samples, "AO Samples", 1);
+ SOCKET_INT(mesh_light_samples, "Mesh Light Samples", 1);
+ SOCKET_INT(subsurface_samples, "Subsurface Samples", 1);
+ SOCKET_INT(volume_samples, "Volume Samples", 1);
+ SOCKET_INT(start_sample, "Start Sample", 0);
+
+ SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true);
+ SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true);
+ SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f);
+
+ static NodeEnum method_enum;
+ method_enum.insert("path", PATH);
+ method_enum.insert("branched_path", BRANCHED_PATH);
+ SOCKET_ENUM(method, "Method", method_enum, PATH);
+
+ static NodeEnum sampling_pattern_enum;
+ sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
+ sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ);
+ SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);
+
+ return type;
+}
+
+Integrator::Integrator() : Node(node_type)
+{
+ need_update = true;
+}
+
+Integrator::~Integrator()
+{
+}
+
+void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene)
+{
+ if (!need_update)
+ return;
+
+ device_free(device, dscene);
+
+ KernelIntegrator *kintegrator = &dscene->data.integrator;
+
+ /* integrator parameters */
+ kintegrator->min_bounce = min_bounce + 1;
+ kintegrator->max_bounce = max_bounce + 1;
+
+ kintegrator->max_diffuse_bounce = max_diffuse_bounce + 1;
+ kintegrator->max_glossy_bounce = max_glossy_bounce + 1;
+ kintegrator->max_transmission_bounce = max_transmission_bounce + 1;
+ kintegrator->max_volume_bounce = max_volume_bounce + 1;
+
+ kintegrator->transparent_min_bounce = transparent_min_bounce + 1;
+ kintegrator->transparent_max_bounce = transparent_max_bounce + 1;
+
+ if (ao_bounces == 0) {
+ kintegrator->ao_bounces = INT_MAX;
+ }
+ else {
+ kintegrator->ao_bounces = ao_bounces - 1;
+ }
+
+ /* Transparent Shadows
+ * We only need to enable transparent shadows, if we actually have
+ * transparent shaders in the scene. Otherwise we can disable it
+ * to improve performance a bit. */
+ kintegrator->transparent_shadows = false;
+ foreach (Shader *shader, scene->shaders) {
+ /* keep this in sync with SD_HAS_TRANSPARENT_SHADOW in shader.cpp */
+ if ((shader->has_surface_transparent && shader->use_transparent_shadow) ||
+ shader->has_volume) {
+ kintegrator->transparent_shadows = true;
+ break;
+ }
+ }
+
+ kintegrator->volume_max_steps = volume_max_steps;
+ kintegrator->volume_step_size = volume_step_size;
+
+ kintegrator->caustics_reflective = caustics_reflective;
+ kintegrator->caustics_refractive = caustics_refractive;
+ kintegrator->filter_glossy = (filter_glossy == 0.0f) ? FLT_MAX : 1.0f / filter_glossy;
+
+ kintegrator->seed = hash_uint2(seed, 0);
+
+ kintegrator->use_ambient_occlusion = ((Pass::contains(scene->film->passes, PASS_AO)) ||
+ dscene->data.background.ao_factor != 0.0f);
+
+ kintegrator->sample_clamp_direct = (sample_clamp_direct == 0.0f) ? FLT_MAX :
+ sample_clamp_direct * 3.0f;
+ kintegrator->sample_clamp_indirect = (sample_clamp_indirect == 0.0f) ?
+ FLT_MAX :
+ sample_clamp_indirect * 3.0f;
+
+ kintegrator->branched = (method == BRANCHED_PATH);
+ kintegrator->volume_decoupled = device->info.has_volume_decoupled;
+ kintegrator->diffuse_samples = diffuse_samples;
+ kintegrator->glossy_samples = glossy_samples;
+ kintegrator->transmission_samples = transmission_samples;
+ kintegrator->ao_samples = ao_samples;
+ kintegrator->mesh_light_samples = mesh_light_samples;
+ kintegrator->subsurface_samples = subsurface_samples;
+ kintegrator->volume_samples = volume_samples;
+ kintegrator->start_sample = start_sample;
+
+ if (method == BRANCHED_PATH) {
+ kintegrator->sample_all_lights_direct = sample_all_lights_direct;
+ kintegrator->sample_all_lights_indirect = sample_all_lights_indirect;
+ }
+ else {
+ kintegrator->sample_all_lights_direct = false;
+ kintegrator->sample_all_lights_indirect = false;
+ }
+
+ kintegrator->sampling_pattern = sampling_pattern;
+ kintegrator->aa_samples = aa_samples;
+
+ if (light_sampling_threshold > 0.0f) {
+ kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
+ }
+ else {
+ kintegrator->light_inv_rr_threshold = 0.0f;
+ }
+
+ /* sobol directions table */
+ int max_samples = 1;
+
+ if (method == BRANCHED_PATH) {
+ foreach (Light *light, scene->lights)
+ max_samples = max(max_samples, light->samples);
+
+ max_samples = max(max_samples,
+ max(diffuse_samples, max(glossy_samples, transmission_samples)));
+ max_samples = max(max_samples, max(ao_samples, max(mesh_light_samples, subsurface_samples)));
+ max_samples = max(max_samples, volume_samples);
+ }
+
+ uint total_bounces = max_bounce + transparent_max_bounce + 3 + VOLUME_BOUNDS_MAX +
+ max(BSSRDF_MAX_HITS, BSSRDF_MAX_BOUNCES);
+
+ max_samples *= total_bounces;
+
+ int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM;
+ dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS);
+
+ uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions);
+
+ sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
+
+ dscene->sobol_directions.copy_to_device();
+
+ need_update = false;
+}
+
+void Integrator::device_free(Device *, DeviceScene *dscene)
+{
+ dscene->sobol_directions.free();
+}
+
+bool Integrator::modified(const Integrator &integrator)
+{
+ return !Node::equals(integrator);
+}
+
+void Integrator::tag_update(Scene *scene)
+{
+ foreach (Shader *shader, scene->shaders) {
+ if (shader->has_integrator_dependency) {
+ scene->shader_manager->need_update = true;
+ break;
+ }
+ }
+ need_update = true;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
--- a/intern/cycles/render/integrator.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/integrator.h 2020-01-10 20:42:43.470923389 +0300
@@ -75,6 +75,9 @@
bool sample_all_lights_indirect;
float light_sampling_threshold;
+ int adaptive_min_samples;
+ float adaptive_threshold;
+
enum Method {
BRANCHED_PATH = 0,
PATH = 1,
diff -Naur a/intern/cycles/render/integrator.h.orig b/intern/cycles/render/integrator.h.orig
--- a/intern/cycles/render/integrator.h.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/integrator.h.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTEGRATOR_H__
+#define __INTEGRATOR_H__
+
+#include "kernel/kernel_types.h"
+
+#include "graph/node.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Device;
+class DeviceScene;
+class Scene;
+
+class Integrator : public Node {
+ public:
+ NODE_DECLARE
+
+ int min_bounce;
+ int max_bounce;
+
+ int max_diffuse_bounce;
+ int max_glossy_bounce;
+ int max_transmission_bounce;
+ int max_volume_bounce;
+
+ int transparent_min_bounce;
+ int transparent_max_bounce;
+
+ int ao_bounces;
+
+ int volume_max_steps;
+ float volume_step_size;
+
+ bool caustics_reflective;
+ bool caustics_refractive;
+ float filter_glossy;
+
+ int seed;
+
+ float sample_clamp_direct;
+ float sample_clamp_indirect;
+ bool motion_blur;
+
+ /* Maximum number of samples, beyond which we are likely to run into
+ * precision issues for sampling patterns. */
+ static const int MAX_SAMPLES = (1 << 24);
+
+ int aa_samples;
+ int diffuse_samples;
+ int glossy_samples;
+ int transmission_samples;
+ int ao_samples;
+ int mesh_light_samples;
+ int subsurface_samples;
+ int volume_samples;
+ int start_sample;
+
+ bool sample_all_lights_direct;
+ bool sample_all_lights_indirect;
+ float light_sampling_threshold;
+
+ enum Method {
+ BRANCHED_PATH = 0,
+ PATH = 1,
+
+ NUM_METHODS,
+ };
+
+ Method method;
+
+ SamplingPattern sampling_pattern;
+
+ bool need_update;
+
+ Integrator();
+ ~Integrator();
+
+ void device_update(Device *device, DeviceScene *dscene, Scene *scene);
+ void device_free(Device *device, DeviceScene *dscene);
+
+ bool modified(const Integrator &integrator);
+ void tag_update(Scene *scene);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __INTEGRATOR_H__ */
diff -Naur a/intern/cycles/render/jitter.cpp b/intern/cycles/render/jitter.cpp
--- a/intern/cycles/render/jitter.cpp 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/jitter.cpp 2020-01-10 20:42:43.470923389 +0300
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file is based on "Progressive Multi-Jittered Sample Sequences"
+ * by Per Christensen, Andrew Kensler and Charlie Kilpatrick.
+ * http://graphics.pixar.com/library/ProgressiveMultiJitteredSampling/paper.pdf
+ *
+ * Performance can be improved in the future by implementing the new
+ * algorithm from Matt Pharr in http://jcgt.org/published/0008/01/04/
+ * "Efficient Generation of Points that Satisfy Two-Dimensional Elementary Intervals"
+ */
+
+#include "render/jitter.h"
+
+#include <math.h>
+#include <vector>
+
+CCL_NAMESPACE_BEGIN
+
+static uint cmj_hash(uint i, uint p)
+{
+ i ^= p;
+ i ^= i >> 17;
+ i ^= i >> 10;
+ i *= 0xb36534e5;
+ i ^= i >> 12;
+ i ^= i >> 21;
+ i *= 0x93fc4795;
+ i ^= 0xdf6e307f;
+ i ^= i >> 17;
+ i *= 1 | p >> 18;
+
+ return i;
+}
+
+static float cmj_randfloat(uint i, uint p)
+{
+ return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+}
+
+class PMJ_Generator {
+ public:
+ static void generate_2D(float2 points[], int size, int rng_seed_in)
+ {
+ PMJ_Generator g(rng_seed_in);
+ points[0].x = g.rnd();
+ points[0].y = g.rnd();
+ int N = 1;
+ while (N < size) {
+ g.extend_sequence_even(points, N);
+ g.extend_sequence_odd(points, 2 * N);
+ N = 4 * N;
+ }
+ }
+
+ protected:
+ PMJ_Generator(int rnd_seed_in) : num_samples(1), rnd_index(2), rnd_seed(rnd_seed_in)
+ {
+ }
+
+ float rnd()
+ {
+ return cmj_randfloat(++rnd_index, rnd_seed);
+ }
+
+ virtual void mark_occupied_strata(float2 points[], int N)
+ {
+ int NN = 2 * N;
+ for (int s = 0; s < NN; ++s) {
+ occupied1Dx[s] = occupied1Dy[s] = false;
+ }
+ for (int s = 0; s < N; ++s) {
+ int xstratum = (int)(NN * points[s].x);
+ int ystratum = (int)(NN * points[s].y);
+ occupied1Dx[xstratum] = true;
+ occupied1Dy[ystratum] = true;
+ }
+ }
+
+ virtual void generate_sample_point(
+ float2 points[], float i, float j, float xhalf, float yhalf, int n, int N)
+ {
+ int NN = 2 * N;
+ float2 pt;
+ int xstratum, ystratum;
+ do {
+ pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+ xstratum = (int)(NN * pt.x);
+ } while (occupied1Dx[xstratum]);
+ do {
+ pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+ ystratum = (int)(NN * pt.y);
+ } while (occupied1Dy[ystratum]);
+ occupied1Dx[xstratum] = true;
+ occupied1Dy[ystratum] = true;
+ points[num_samples] = pt;
+ ++num_samples;
+ }
+
+ void extend_sequence_even(float2 points[], int N)
+ {
+ int n = (int)sqrtf(N);
+ occupied1Dx.resize(2 * N);
+ occupied1Dy.resize(2 * N);
+ mark_occupied_strata(points, N);
+ for (int s = 0; s < N; ++s) {
+ float2 oldpt = points[s];
+ float i = floorf(n * oldpt.x);
+ float j = floorf(n * oldpt.y);
+ float xhalf = floorf(2.0f * (n * oldpt.x - i));
+ float yhalf = floorf(2.0f * (n * oldpt.y - j));
+ xhalf = 1.0f - xhalf;
+ yhalf = 1.0f - yhalf;
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+ }
+ }
+
+ void extend_sequence_odd(float2 points[], int N)
+ {
+ int n = (int)sqrtf(N / 2);
+ occupied1Dx.resize(2 * N);
+ occupied1Dy.resize(2 * N);
+ mark_occupied_strata(points, N);
+ std::vector<float> xhalves(N / 2);
+ std::vector<float> yhalves(N / 2);
+ for (int s = 0; s < N / 2; ++s) {
+ float2 oldpt = points[s];
+ float i = floorf(n * oldpt.x);
+ float j = floorf(n * oldpt.y);
+ float xhalf = floorf(2.0f * (n * oldpt.x - i));
+ float yhalf = floorf(2.0f * (n * oldpt.y - j));
+ if (rnd() > 0.5f) {
+ xhalf = 1.0f - xhalf;
+ }
+ else {
+ yhalf = 1.0f - yhalf;
+ }
+ xhalves[s] = xhalf;
+ yhalves[s] = yhalf;
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+ }
+ for (int s = 0; s < N / 2; ++s) {
+ float2 oldpt = points[s];
+ float i = floorf(n * oldpt.x);
+ float j = floorf(n * oldpt.y);
+ float xhalf = 1.0f - xhalves[s];
+ float yhalf = 1.0f - yhalves[s];
+ generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+ }
+ }
+
+ std::vector<bool> occupied1Dx, occupied1Dy;
+ int num_samples;
+ int rnd_index, rnd_seed;
+};
+
+class PMJ02_Generator : public PMJ_Generator {
+ protected:
+ void generate_sample_point(
+ float2 points[], float i, float j, float xhalf, float yhalf, int n, int N) override
+ {
+ int NN = 2 * N;
+ float2 pt;
+ do {
+ pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+ pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+ } while (is_occupied(pt, NN));
+ mark_occupied_strata1(pt, NN);
+ points[num_samples] = pt;
+ ++num_samples;
+ }
+
+ void mark_occupied_strata(float2 points[], int N) override
+ {
+ int NN = 2 * N;
+ int num_shapes = (int)log2f(NN) + 1;
+ occupiedStrata.resize(num_shapes);
+ for (int shape = 0; shape < num_shapes; ++shape) {
+ occupiedStrata[shape].resize(NN);
+ for (int n = 0; n < NN; ++n) {
+ occupiedStrata[shape][n] = false;
+ }
+ }
+ for (int s = 0; s < N; ++s) {
+ mark_occupied_strata1(points[s], NN);
+ }
+ }
+
+ void mark_occupied_strata1(float2 pt, int NN)
+ {
+ int shape = 0;
+ int xdivs = NN;
+ int ydivs = 1;
+ do {
+ int xstratum = (int)(xdivs * pt.x);
+ int ystratum = (int)(ydivs * pt.y);
+ size_t index = ystratum * xdivs + xstratum;
+ assert(index < NN);
+ occupiedStrata[shape][index] = true;
+ shape = shape + 1;
+ xdivs = xdivs / 2;
+ ydivs = ydivs * 2;
+ } while (xdivs > 0);
+ }
+
+ bool is_occupied(float2 pt, int NN)
+ {
+ int shape = 0;
+ int xdivs = NN;
+ int ydivs = 1;
+ do {
+ int xstratum = (int)(xdivs * pt.x);
+ int ystratum = (int)(ydivs * pt.y);
+ size_t index = ystratum * xdivs + xstratum;
+ assert(index < NN);
+ if (occupiedStrata[shape][index]) {
+ return true;
+ }
+ shape = shape + 1;
+ xdivs = xdivs / 2;
+ ydivs = ydivs * 2;
+ } while (xdivs > 0);
+ return false;
+ }
+
+ private:
+ std::vector<std::vector<bool>> occupiedStrata;
+};
+
+static void shuffle(float2 points[], int size, int rng_seed)
+{
+ /* Offset samples by 1.0 for faster scrambling in kernel_random.h */
+ for (int i = 0; i < size; ++i) {
+ points[i].x += 1.0f;
+ points[i].y += 1.0f;
+ }
+
+ if (rng_seed == 0) {
+ return;
+ }
+
+ constexpr int odd[8] = {0, 1, 4, 5, 10, 11, 14, 15};
+ constexpr int even[8] = {2, 3, 6, 7, 8, 9, 12, 13};
+
+ int rng_index = 0;
+ for (int yy = 0; yy < size / 16; ++yy) {
+ for (int xx = 0; xx < 8; ++xx) {
+ int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+ float2 tmp = points[odd[other] + yy * 16];
+ points[odd[other] + yy * 16] = points[odd[xx] + yy * 16];
+ points[odd[xx] + yy * 16] = tmp;
+ }
+ for (int xx = 0; xx < 8; ++xx) {
+ int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+ float2 tmp = points[even[other] + yy * 16];
+ points[even[other] + yy * 16] = points[even[xx] + yy * 16];
+ points[even[xx] + yy * 16] = tmp;
+ }
+ }
+}
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed)
+{
+ PMJ_Generator::generate_2D(points, size, rng_seed);
+ shuffle(points, size, rng_seed);
+}
+
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed)
+{
+ PMJ02_Generator::generate_2D(points, size, rng_seed);
+ shuffle(points, size, rng_seed);
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/jitter.h b/intern/cycles/render/jitter.h
--- a/intern/cycles/render/jitter.h 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/jitter.h 2020-01-10 20:42:43.470923389 +0300
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __JITTER_H__
+#define __JITTER_H__
+
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed);
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed);
+
+CCL_NAMESPACE_END
+
+#endif /* __JITTER_H__ */
diff -Naur a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
--- a/intern/cycles/render/scene.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/scene.cpp 2020-01-10 20:42:43.474256721 +0300
@@ -77,7 +77,7 @@
svm_nodes(device, "__svm_nodes", MEM_TEXTURE),
shaders(device, "__shaders", MEM_TEXTURE),
lookup_table(device, "__lookup_table", MEM_TEXTURE),
- sobol_directions(device, "__sobol_directions", MEM_TEXTURE),
+ sample_pattern_lut(device, "__sample_pattern_lut", MEM_TEXTURE),
ies_lights(device, "__ies", MEM_TEXTURE)
{
memset((void *)&data, 0, sizeof(data));
diff -Naur a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
--- a/intern/cycles/render/scene.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/scene.h 2020-01-10 20:42:43.474256721 +0300
@@ -119,7 +119,7 @@
device_vector<float> lookup_table;
/* integrator */
- device_vector<uint> sobol_directions;
+ device_vector<uint> sample_pattern_lut;
/* ies lights */
device_vector<float> ies_lights;
diff -Naur a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
--- a/intern/cycles/render/session.cpp 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/session.cpp 2020-01-10 20:42:43.474256721 +0300
@@ -900,7 +900,7 @@
Integrator *integrator = scene->integrator;
BakeManager *bake_manager = scene->bake_manager;
- if (integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || bake_manager->get_baking()) {
+ if (integrator->sampling_pattern != SAMPLING_PATTERN_SOBOL || bake_manager->get_baking()) {
int aa_samples = tile_manager.num_samples;
if (aa_samples != integrator->aa_samples) {
@@ -1022,6 +1022,7 @@
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
task.need_finish_queue = params.progressive_refine;
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
+ task.integrator_adaptive = scene->integrator->sampling_pattern == SAMPLING_PATTERN_PMJ;
task.requested_tile_size = params.tile_size;
task.passes_size = tile_manager.params.get_passes_size();
diff -Naur a/intern/cycles/render/session.cpp.orig b/intern/cycles/render/session.cpp.orig
--- a/intern/cycles/render/session.cpp.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/session.cpp.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,1163 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <limits.h>
+
+#include "render/buffers.h"
+#include "render/camera.h"
+#include "device/device.h"
+#include "render/graph.h"
+#include "render/integrator.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/session.h"
+#include "render/bake.h"
+
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_logging.h"
+#include "util/util_math.h"
+#include "util/util_opengl.h"
+#include "util/util_task.h"
+#include "util/util_time.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Note about preserve_tile_device option for tile manager:
+ * progressive refine and viewport rendering does requires tiles to
+ * always be allocated for the same device
+ */
+Session::Session(const SessionParams &params_)
+ : params(params_),
+ tile_manager(params.progressive,
+ params.samples,
+ params.tile_size,
+ params.start_resolution,
+ params.background == false || params.progressive_refine,
+ params.background,
+ params.tile_order,
+ max(params.device.multi_devices.size(), 1),
+ params.pixel_size),
+ stats(),
+ profiler()
+{
+ device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background);
+
+ TaskScheduler::init(params.threads);
+
+ device = Device::create(params.device, stats, profiler, params.background);
+
+ if (params.background && !params.write_render_cb) {
+ buffers = NULL;
+ display = NULL;
+ }
+ else {
+ buffers = new RenderBuffers(device);
+ display = new DisplayBuffer(device, params.display_buffer_linear);
+ }
+
+ session_thread = NULL;
+ scene = NULL;
+
+ reset_time = 0.0;
+ last_update_time = 0.0;
+
+ delayed_reset.do_reset = false;
+ delayed_reset.samples = 0;
+
+ display_outdated = false;
+ gpu_draw_ready = false;
+ gpu_need_display_buffer_update = false;
+ pause = false;
+ kernels_loaded = false;
+
+ /* TODO(sergey): Check if it's indeed optimal value for the split kernel. */
+ max_closure_global = 1;
+}
+
+Session::~Session()
+{
+ if (session_thread) {
+ /* wait for session thread to end */
+ progress.set_cancel("Exiting");
+
+ gpu_need_display_buffer_update = false;
+ gpu_need_display_buffer_update_cond.notify_all();
+
+ {
+ thread_scoped_lock pause_lock(pause_mutex);
+ pause = false;
+ }
+ pause_cond.notify_all();
+
+ wait();
+ }
+
+ if (params.write_render_cb) {
+ /* Copy to display buffer and write out image if requested */
+ delete display;
+
+ display = new DisplayBuffer(device, false);
+ display->reset(buffers->params);
+ copy_to_display_buffer(params.samples);
+
+ int w = display->draw_width;
+ int h = display->draw_height;
+ uchar4 *pixels = display->rgba_byte.copy_from_device(0, w, h);
+ params.write_render_cb((uchar *)pixels, w, h, 4);
+ }
+
+ /* clean up */
+ tile_manager.device_free();
+
+ delete buffers;
+ delete display;
+ delete scene;
+ delete device;
+
+ TaskScheduler::exit();
+}
+
+void Session::start()
+{
+ if (!session_thread) {
+ session_thread = new thread(function_bind(&Session::run, this));
+ }
+}
+
+bool Session::ready_to_reset()
+{
+ double dt = time_dt() - reset_time;
+
+ if (!display_outdated)
+ return (dt > params.reset_timeout);
+ else
+ return (dt > params.cancel_timeout);
+}
+
+/* GPU Session */
+
+void Session::reset_gpu(BufferParams &buffer_params, int samples)
+{
+ thread_scoped_lock pause_lock(pause_mutex);
+
+ /* block for buffer access and reset immediately. we can't do this
+ * in the thread, because we need to allocate an OpenGL buffer, and
+ * that only works in the main thread */
+ thread_scoped_lock display_lock(display_mutex);
+ thread_scoped_lock buffers_lock(buffers_mutex);
+
+ display_outdated = true;
+ reset_time = time_dt();
+
+ reset_(buffer_params, samples);
+
+ gpu_need_display_buffer_update = false;
+ gpu_need_display_buffer_update_cond.notify_all();
+
+ pause_cond.notify_all();
+}
+
+bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_params)
+{
+ /* block for buffer access */
+ thread_scoped_lock display_lock(display_mutex);
+
+ /* first check we already rendered something */
+ if (gpu_draw_ready) {
+ /* then verify the buffers have the expected size, so we don't
+ * draw previous results in a resized window */
+ if (!buffer_params.modified(display->params)) {
+ /* for CUDA we need to do tone-mapping still, since we can
+ * only access GL buffers from the main thread. */
+ if (gpu_need_display_buffer_update) {
+ thread_scoped_lock buffers_lock(buffers_mutex);
+ copy_to_display_buffer(tile_manager.state.sample);
+ gpu_need_display_buffer_update = false;
+ gpu_need_display_buffer_update_cond.notify_all();
+ }
+
+ display->draw(device, draw_params);
+
+ if (display_outdated && (time_dt() - reset_time) > params.text_timeout)
+ return false;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void Session::run_gpu()
+{
+ bool tiles_written = false;
+
+ reset_time = time_dt();
+ last_update_time = time_dt();
+
+ progress.set_render_start_time();
+
+ while (!progress.get_cancel()) {
+ /* advance to next tile */
+ bool no_tiles = !tile_manager.next();
+
+ DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+ if (no_tiles) {
+ kernel_state = device->get_active_kernel_switch_state();
+ }
+
+ if (params.background) {
+ /* if no work left and in background mode, we can stop immediately */
+ if (no_tiles) {
+ progress.set_status("Finished");
+ break;
+ }
+ }
+
+ /* Don't go in pause mode when image was rendered with preview kernels
+ * When feature kernels become available the session will be reset. */
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+ time_sleep(0.1);
+ }
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+ reset_gpu(tile_manager.params, params.samples);
+ }
+
+ else {
+ /* if in interactive mode, and we are either paused or done for now,
+ * wait for pause condition notify to wake up again */
+ thread_scoped_lock pause_lock(pause_mutex);
+
+ if (!pause && !tile_manager.done()) {
+ /* reset could have happened after no_tiles was set, before this lock.
+ * in this case we shall not wait for pause condition
+ */
+ }
+ else if (pause || no_tiles) {
+ update_status_time(pause, no_tiles);
+
+ while (1) {
+ scoped_timer pause_timer;
+ pause_cond.wait(pause_lock);
+ if (pause) {
+ progress.add_skip_time(pause_timer, params.background);
+ }
+
+ update_status_time(pause, no_tiles);
+ progress.set_update();
+
+ if (!pause)
+ break;
+ }
+ }
+
+ if (progress.get_cancel())
+ break;
+ }
+
+ if (!no_tiles) {
+ /* update scene */
+ scoped_timer update_timer;
+ if (update_scene()) {
+ profiler.reset(scene->shaders.size(), scene->objects.size());
+ }
+ progress.add_skip_time(update_timer, params.background);
+
+ if (!device->error_message().empty())
+ progress.set_error(device->error_message());
+
+ if (progress.get_cancel())
+ break;
+
+ /* buffers mutex is locked entirely while rendering each
+ * sample, and released/reacquired on each iteration to allow
+ * reset and draw in between */
+ thread_scoped_lock buffers_lock(buffers_mutex);
+
+ /* update status and timing */
+ update_status_time();
+
+ /* render */
+ render();
+
+ device->task_wait();
+
+ if (!device->error_message().empty())
+ progress.set_cancel(device->error_message());
+
+ /* update status and timing */
+ update_status_time();
+
+ gpu_need_display_buffer_update = true;
+ gpu_draw_ready = true;
+ progress.set_update();
+
+ /* wait for until display buffer is updated */
+ if (!params.background) {
+ while (gpu_need_display_buffer_update) {
+ if (progress.get_cancel())
+ break;
+
+ gpu_need_display_buffer_update_cond.wait(buffers_lock);
+ }
+ }
+
+ if (!device->error_message().empty())
+ progress.set_error(device->error_message());
+
+ tiles_written = update_progressive_refine(progress.get_cancel());
+
+ if (progress.get_cancel())
+ break;
+ }
+ }
+
+ if (!tiles_written)
+ update_progressive_refine(true);
+}
+
+/* CPU Session */
+
+void Session::reset_cpu(BufferParams &buffer_params, int samples)
+{
+ thread_scoped_lock reset_lock(delayed_reset.mutex);
+ thread_scoped_lock pause_lock(pause_mutex);
+
+ display_outdated = true;
+ reset_time = time_dt();
+
+ delayed_reset.params = buffer_params;
+ delayed_reset.samples = samples;
+ delayed_reset.do_reset = true;
+ device->task_cancel();
+
+ pause_cond.notify_all();
+}
+
+bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_params)
+{
+ thread_scoped_lock display_lock(display_mutex);
+
+ /* first check we already rendered something */
+ if (display->draw_ready()) {
+ /* then verify the buffers have the expected size, so we don't
+ * draw previous results in a resized window */
+ if (!buffer_params.modified(display->params)) {
+ display->draw(device, draw_params);
+
+ if (display_outdated && (time_dt() - reset_time) > params.text_timeout)
+ return false;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
+{
+ if (progress.get_cancel()) {
+ if (params.progressive_refine == false) {
+ /* for progressive refine current sample should be finished for all tiles */
+ return false;
+ }
+ }
+
+ thread_scoped_lock tile_lock(tile_mutex);
+
+ /* get next tile from manager */
+ Tile *tile;
+ int device_num = device->device_number(tile_device);
+
+ if (!tile_manager.next_tile(tile, device_num))
+ return false;
+
+ /* fill render tile */
+ rtile.x = tile_manager.state.buffer.full_x + tile->x;
+ rtile.y = tile_manager.state.buffer.full_y + tile->y;
+ rtile.w = tile->w;
+ rtile.h = tile->h;
+ rtile.start_sample = tile_manager.state.sample;
+ rtile.num_samples = tile_manager.state.num_samples;
+ rtile.resolution = tile_manager.state.resolution_divider;
+ rtile.tile_index = tile->index;
+ rtile.task = (tile->state == Tile::DENOISE) ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
+
+ tile_lock.unlock();
+
+ /* in case of a permanent buffer, return it, otherwise we will allocate
+ * a new temporary buffer */
+ if (buffers) {
+ tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
+
+ rtile.buffer = buffers->buffer.device_pointer;
+ rtile.buffers = buffers;
+
+ device->map_tile(tile_device, rtile);
+
+ return true;
+ }
+
+ if (tile->buffers == NULL) {
+ /* fill buffer parameters */
+ BufferParams buffer_params = tile_manager.params;
+ buffer_params.full_x = rtile.x;
+ buffer_params.full_y = rtile.y;
+ buffer_params.width = rtile.w;
+ buffer_params.height = rtile.h;
+
+ /* allocate buffers */
+ tile->buffers = new RenderBuffers(tile_device);
+ tile->buffers->reset(buffer_params);
+ }
+
+ tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
+
+ rtile.buffer = tile->buffers->buffer.device_pointer;
+ rtile.buffers = tile->buffers;
+ rtile.sample = tile_manager.state.sample;
+
+ /* this will tag tile as IN PROGRESS in blender-side render pipeline,
+ * which is needed to highlight currently rendering tile before first
+ * sample was processed for it
+ */
+ update_tile_sample(rtile);
+
+ return true;
+}
+
+void Session::update_tile_sample(RenderTile &rtile)
+{
+ thread_scoped_lock tile_lock(tile_mutex);
+
+ if (update_render_tile_cb) {
+ if (params.progressive_refine == false) {
+ /* todo: optimize this by making it thread safe and removing lock */
+
+ update_render_tile_cb(rtile, true);
+ }
+ }
+
+ update_status_time();
+}
+
+void Session::release_tile(RenderTile &rtile)
+{
+ thread_scoped_lock tile_lock(tile_mutex);
+
+ progress.add_finished_tile(rtile.task == RenderTile::DENOISE);
+
+ bool delete_tile;
+
+ if (tile_manager.finish_tile(rtile.tile_index, delete_tile)) {
+ if (write_render_tile_cb && params.progressive_refine == false) {
+ write_render_tile_cb(rtile);
+ }
+
+ if (delete_tile) {
+ delete rtile.buffers;
+ tile_manager.state.tiles[rtile.tile_index].buffers = NULL;
+ }
+ }
+ else {
+ if (update_render_tile_cb && params.progressive_refine == false) {
+ update_render_tile_cb(rtile, false);
+ }
+ }
+
+ update_status_time();
+}
+
+void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
+{
+ thread_scoped_lock tile_lock(tile_mutex);
+
+ int center_idx = tiles[4].tile_index;
+ assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
+ BufferParams buffer_params = tile_manager.params;
+ int4 image_region = make_int4(buffer_params.full_x,
+ buffer_params.full_y,
+ buffer_params.full_x + buffer_params.width,
+ buffer_params.full_y + buffer_params.height);
+
+ for (int dy = -1, i = 0; dy <= 1; dy++) {
+ for (int dx = -1; dx <= 1; dx++, i++) {
+ int px = tiles[4].x + dx * params.tile_size.x;
+ int py = tiles[4].y + dy * params.tile_size.y;
+ if (px >= image_region.x && py >= image_region.y && px < image_region.z &&
+ py < image_region.w) {
+ int tile_index = center_idx + dy * tile_manager.state.tile_stride + dx;
+ Tile *tile = &tile_manager.state.tiles[tile_index];
+ assert(tile->buffers);
+
+ tiles[i].buffer = tile->buffers->buffer.device_pointer;
+ tiles[i].x = tile_manager.state.buffer.full_x + tile->x;
+ tiles[i].y = tile_manager.state.buffer.full_y + tile->y;
+ tiles[i].w = tile->w;
+ tiles[i].h = tile->h;
+ tiles[i].buffers = tile->buffers;
+
+ tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
+ }
+ else {
+ tiles[i].buffer = (device_ptr)NULL;
+ tiles[i].buffers = NULL;
+ tiles[i].x = clamp(px, image_region.x, image_region.z);
+ tiles[i].y = clamp(py, image_region.y, image_region.w);
+ tiles[i].w = tiles[i].h = 0;
+ }
+ }
+ }
+
+ assert(tiles[4].buffers);
+ device->map_neighbor_tiles(tile_device, tiles);
+
+ /* The denoised result is written back to the original tile. */
+ tiles[9] = tiles[4];
+}
+
+void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device)
+{
+ thread_scoped_lock tile_lock(tile_mutex);
+ device->unmap_neighbor_tiles(tile_device, tiles);
+}
+
+void Session::run_cpu()
+{
+ bool tiles_written = false;
+
+ last_update_time = time_dt();
+
+ {
+ /* reset once to start */
+ thread_scoped_lock reset_lock(delayed_reset.mutex);
+ thread_scoped_lock buffers_lock(buffers_mutex);
+ thread_scoped_lock display_lock(display_mutex);
+
+ reset_(delayed_reset.params, delayed_reset.samples);
+ delayed_reset.do_reset = false;
+ }
+
+ while (!progress.get_cancel()) {
+ /* advance to next tile */
+ bool no_tiles = !tile_manager.next();
+ bool need_copy_to_display_buffer = false;
+
+ DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+ if (no_tiles) {
+ kernel_state = device->get_active_kernel_switch_state();
+ }
+
+ if (params.background) {
+ /* if no work left and in background mode, we can stop immediately */
+ if (no_tiles) {
+ progress.set_status("Finished");
+ break;
+ }
+ }
+
+ /* Don't go in pause mode when preview kernels are used
+ * When feature kernels become available the session will be resetted. */
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+ time_sleep(0.1);
+ }
+ else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+ reset_cpu(tile_manager.params, params.samples);
+ }
+
+ else {
+ /* if in interactive mode, and we are either paused or done for now,
+ * wait for pause condition notify to wake up again */
+ thread_scoped_lock pause_lock(pause_mutex);
+
+ if (!pause && delayed_reset.do_reset) {
+ /* reset once to start */
+ thread_scoped_lock reset_lock(delayed_reset.mutex);
+ thread_scoped_lock buffers_lock(buffers_mutex);
+ thread_scoped_lock display_lock(display_mutex);
+
+ reset_(delayed_reset.params, delayed_reset.samples);
+ delayed_reset.do_reset = false;
+ }
+ else if (pause || no_tiles) {
+ update_status_time(pause, no_tiles);
+
+ while (1) {
+ scoped_timer pause_timer;
+ pause_cond.wait(pause_lock);
+ if (pause) {
+ progress.add_skip_time(pause_timer, params.background);
+ }
+
+ update_status_time(pause, no_tiles);
+ progress.set_update();
+
+ if (!pause)
+ break;
+ }
+ }
+
+ if (progress.get_cancel())
+ break;
+ }
+
+ if (!no_tiles) {
+ /* buffers mutex is locked entirely while rendering each
+ * sample, and released/reacquired on each iteration to allow
+ * reset and draw in between */
+ thread_scoped_lock buffers_lock(buffers_mutex);
+
+ /* update scene */
+ scoped_timer update_timer;
+ if (update_scene()) {
+ profiler.reset(scene->shaders.size(), scene->objects.size());
+ }
+ progress.add_skip_time(update_timer, params.background);
+
+ if (!device->error_message().empty())
+ progress.set_error(device->error_message());
+
+ if (progress.get_cancel())
+ break;
+
+ /* update status and timing */
+ update_status_time();
+
+ /* render */
+ render();
+
+ /* update status and timing */
+ update_status_time();
+
+ if (!params.background)
+ need_copy_to_display_buffer = true;
+
+ if (!device->error_message().empty())
+ progress.set_error(device->error_message());
+ }
+
+ device->task_wait();
+
+ {
+ thread_scoped_lock reset_lock(delayed_reset.mutex);
+ thread_scoped_lock buffers_lock(buffers_mutex);
+ thread_scoped_lock display_lock(display_mutex);
+
+ if (delayed_reset.do_reset) {
+ /* reset rendering if request from main thread */
+ delayed_reset.do_reset = false;
+ reset_(delayed_reset.params, delayed_reset.samples);
+ }
+ else if (need_copy_to_display_buffer) {
+ /* Only copy to display_buffer if we do not reset, we don't
+ * want to show the result of an incomplete sample */
+ copy_to_display_buffer(tile_manager.state.sample);
+ }
+
+ if (!device->error_message().empty())
+ progress.set_error(device->error_message());
+
+ tiles_written = update_progressive_refine(progress.get_cancel());
+ }
+
+ progress.set_update();
+ }
+
+ if (!tiles_written)
+ update_progressive_refine(true);
+}
+
+DeviceRequestedFeatures Session::get_requested_device_features()
+{
+ /* TODO(sergey): Consider moving this to the Scene level. */
+ DeviceRequestedFeatures requested_features;
+ requested_features.experimental = params.experimental;
+
+ scene->shader_manager->get_requested_features(scene, &requested_features);
+
+ /* This features are not being tweaked as often as shaders,
+ * so could be done selective magic for the viewport as well.
+ */
+ bool use_motion = scene->need_motion() == Scene::MotionType::MOTION_BLUR;
+ requested_features.use_hair = false;
+ requested_features.use_object_motion = false;
+ requested_features.use_camera_motion = use_motion && scene->camera->use_motion();
+ foreach (Object *object, scene->objects) {
+ Mesh *mesh = object->mesh;
+ if (mesh->num_curves()) {
+ requested_features.use_hair = true;
+ }
+ if (use_motion) {
+ requested_features.use_object_motion |= object->use_motion() | mesh->use_motion_blur;
+ requested_features.use_camera_motion |= mesh->use_motion_blur;
+ }
+#ifdef WITH_OPENSUBDIV
+ if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
+ requested_features.use_patch_evaluation = true;
+ }
+#endif
+ if (object->is_shadow_catcher) {
+ requested_features.use_shadow_tricks = true;
+ }
+ requested_features.use_true_displacement |= mesh->has_true_displacement();
+ }
+
+ requested_features.use_background_light = scene->light_manager->has_background_light(scene);
+
+ BakeManager *bake_manager = scene->bake_manager;
+ requested_features.use_baking = bake_manager->get_baking();
+ requested_features.use_integrator_branched = (scene->integrator->method ==
+ Integrator::BRANCHED_PATH);
+ if (params.run_denoising) {
+ requested_features.use_denoising = true;
+ requested_features.use_shadow_tricks = true;
+ }
+
+ return requested_features;
+}
+
+bool Session::load_kernels(bool lock_scene)
+{
+ thread_scoped_lock scene_lock;
+ if (lock_scene) {
+ scene_lock = thread_scoped_lock(scene->mutex);
+ }
+
+ DeviceRequestedFeatures requested_features = get_requested_device_features();
+
+ if (!kernels_loaded || loaded_kernel_features.modified(requested_features)) {
+ progress.set_status("Loading render kernels (may take a few minutes the first time)");
+
+ scoped_timer timer;
+
+ VLOG(2) << "Requested features:\n" << requested_features;
+ if (!device->load_kernels(requested_features)) {
+ string message = device->error_message();
+ if (message.empty())
+ message = "Failed loading render kernel, see console for errors";
+
+ progress.set_error(message);
+ progress.set_status("Error", message);
+ progress.set_update();
+ return false;
+ }
+
+ progress.add_skip_time(timer, false);
+ VLOG(1) << "Total time spent loading kernels: " << time_dt() - timer.get_start();
+
+ kernels_loaded = true;
+ loaded_kernel_features = requested_features;
+ return true;
+ }
+ return false;
+}
+
+void Session::run()
+{
+ if (params.use_profiling && (params.device.type == DEVICE_CPU)) {
+ profiler.start();
+ }
+
+ /* session thread loop */
+ progress.set_status("Waiting for render to start");
+
+ /* run */
+ if (!progress.get_cancel()) {
+ /* reset number of rendered samples */
+ progress.reset_sample();
+
+ if (device_use_gl)
+ run_gpu();
+ else
+ run_cpu();
+ }
+
+ profiler.stop();
+
+ /* progress update */
+ if (progress.get_cancel())
+ progress.set_status("Cancel", progress.get_cancel_message());
+ else
+ progress.set_update();
+}
+
+bool Session::draw(BufferParams &buffer_params, DeviceDrawParams &draw_params)
+{
+ if (device_use_gl)
+ return draw_gpu(buffer_params, draw_params);
+ else
+ return draw_cpu(buffer_params, draw_params);
+}
+
+void Session::reset_(BufferParams &buffer_params, int samples)
+{
+ if (buffers && buffer_params.modified(tile_manager.params)) {
+ gpu_draw_ready = false;
+ buffers->reset(buffer_params);
+ if (display) {
+ display->reset(buffer_params);
+ }
+ }
+
+ tile_manager.reset(buffer_params, samples);
+ progress.reset_sample();
+
+ bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX;
+ progress.set_total_pixel_samples(show_progress ? tile_manager.state.total_pixel_samples : 0);
+
+ if (!params.background)
+ progress.set_start_time();
+ progress.set_render_start_time();
+}
+
+void Session::reset(BufferParams &buffer_params, int samples)
+{
+ if (device_use_gl)
+ reset_gpu(buffer_params, samples);
+ else
+ reset_cpu(buffer_params, samples);
+}
+
+void Session::set_samples(int samples)
+{
+ if (samples != params.samples) {
+ params.samples = samples;
+ tile_manager.set_samples(samples);
+
+ {
+ thread_scoped_lock pause_lock(pause_mutex);
+ }
+ pause_cond.notify_all();
+ }
+}
+
+void Session::set_pause(bool pause_)
+{
+ bool notify = false;
+
+ {
+ thread_scoped_lock pause_lock(pause_mutex);
+
+ if (pause != pause_) {
+ pause = pause_;
+ notify = true;
+ }
+ }
+
+ if (notify)
+ pause_cond.notify_all();
+}
+
+void Session::wait()
+{
+ if (session_thread) {
+ session_thread->join();
+ delete session_thread;
+ }
+
+ session_thread = NULL;
+}
+
+bool Session::update_scene()
+{
+ thread_scoped_lock scene_lock(scene->mutex);
+
+ /* update camera if dimensions changed for progressive render. the camera
+ * knows nothing about progressive or cropped rendering, it just gets the
+ * image dimensions passed in */
+ Camera *cam = scene->camera;
+ int width = tile_manager.state.buffer.full_width;
+ int height = tile_manager.state.buffer.full_height;
+ int resolution = tile_manager.state.resolution_divider;
+
+ if (width != cam->width || height != cam->height) {
+ cam->width = width;
+ cam->height = height;
+ cam->resolution = resolution;
+ cam->tag_update();
+ }
+
+ /* number of samples is needed by multi jittered
+ * sampling pattern and by baking */
+ Integrator *integrator = scene->integrator;
+ BakeManager *bake_manager = scene->bake_manager;
+
+ if (integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || bake_manager->get_baking()) {
+ int aa_samples = tile_manager.num_samples;
+
+ if (aa_samples != integrator->aa_samples) {
+ integrator->aa_samples = aa_samples;
+ integrator->tag_update(scene);
+ }
+ }
+
+ /* update scene */
+ if (scene->need_update()) {
+ bool new_kernels_needed = load_kernels(false);
+
+ /* Update max_closures. */
+ KernelIntegrator *kintegrator = &scene->dscene.data.integrator;
+ if (params.background) {
+ kintegrator->max_closures = get_max_closure_count();
+ }
+ else {
+ /* Currently viewport render is faster with higher max_closures, needs investigating. */
+ kintegrator->max_closures = MAX_CLOSURE;
+ }
+
+ progress.set_status("Updating Scene");
+ MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
+
+ DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state();
+ bool kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE ||
+ kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
+ if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+ progress.set_kernel_status("Compiling render kernels");
+ }
+ if (new_kernels_needed || kernel_switch_needed) {
+ progress.set_kernel_status("Compiling render kernels");
+ device->wait_for_availability(loaded_kernel_features);
+ progress.set_kernel_status("");
+ }
+
+ if (kernel_switch_needed) {
+ reset(tile_manager.params, params.samples);
+ }
+ return true;
+ }
+ return false;
+}
+
+void Session::update_status_time(bool show_pause, bool show_done)
+{
+ int progressive_sample = tile_manager.state.sample;
+ int num_samples = tile_manager.get_num_effective_samples();
+
+ int tile = progress.get_rendered_tiles();
+ int num_tiles = tile_manager.state.num_tiles;
+
+ /* update status */
+ string status, substatus;
+
+ if (!params.progressive) {
+ const bool is_cpu = params.device.type == DEVICE_CPU;
+ const bool rendering_finished = (tile == num_tiles);
+ const bool is_last_tile = (tile + 1) == num_tiles;
+
+ substatus = string_printf("Rendered %d/%d Tiles", tile, num_tiles);
+
+ if (!rendering_finished && (device->show_samples() || (is_cpu && is_last_tile))) {
+ /* Some devices automatically support showing the sample number:
+ * - CUDADevice
+ * - OpenCLDevice when using the megakernel (the split kernel renders multiple
+ * samples at the same time, so the current sample isn't really defined)
+ * - CPUDevice when using one thread
+ * For these devices, the current sample is always shown.
+ *
+ * The other option is when the last tile is currently being rendered by the CPU.
+ */
+ substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
+ }
+ if (params.full_denoising || params.optix_denoising) {
+ substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles());
+ }
+ else if (params.run_denoising) {
+ substatus += string_printf(", Prefiltered %d tiles", progress.get_denoised_tiles());
+ }
+ }
+ else if (tile_manager.num_samples == Integrator::MAX_SAMPLES)
+ substatus = string_printf("Path Tracing Sample %d", progressive_sample + 1);
+ else
+ substatus = string_printf("Path Tracing Sample %d/%d", progressive_sample + 1, num_samples);
+
+ if (show_pause) {
+ status = "Rendering Paused";
+ }
+ else if (show_done) {
+ status = "Rendering Done";
+ progress.set_end_time(); /* Save end time so that further calls to get_time are accurate. */
+ }
+ else {
+ status = substatus;
+ substatus.clear();
+ }
+
+ progress.set_status(status, substatus);
+}
+
+void Session::render()
+{
+ /* Clear buffers. */
+ if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) {
+ buffers->zero();
+ }
+
+ /* Add path trace task. */
+ DeviceTask task(DeviceTask::RENDER);
+
+ task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2);
+ task.release_tile = function_bind(&Session::release_tile, this, _1);
+ task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
+ task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
+ task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
+ task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
+ task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
+ task.need_finish_queue = params.progressive_refine;
+ task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
+ task.requested_tile_size = params.tile_size;
+ task.passes_size = tile_manager.params.get_passes_size();
+
+ if (params.run_denoising) {
+ task.denoising = params.denoising;
+
+ assert(!scene->film->need_update);
+ task.pass_stride = scene->film->pass_stride;
+ task.target_pass_stride = task.pass_stride;
+ task.pass_denoising_data = scene->film->denoising_data_offset;
+ task.pass_denoising_clean = scene->film->denoising_clean_offset;
+
+ task.denoising_from_render = true;
+ task.denoising_do_filter = params.full_denoising;
+ task.denoising_use_optix = params.optix_denoising;
+ task.denoising_write_passes = params.write_denoising_passes;
+ }
+
+ device->task_add(task);
+}
+
+void Session::copy_to_display_buffer(int sample)
+{
+ /* add film conversion task */
+ DeviceTask task(DeviceTask::FILM_CONVERT);
+
+ task.x = tile_manager.state.buffer.full_x;
+ task.y = tile_manager.state.buffer.full_y;
+ task.w = tile_manager.state.buffer.width;
+ task.h = tile_manager.state.buffer.height;
+ task.rgba_byte = display->rgba_byte.device_pointer;
+ task.rgba_half = display->rgba_half.device_pointer;
+ task.buffer = buffers->buffer.device_pointer;
+ task.sample = sample;
+ tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
+
+ if (task.w > 0 && task.h > 0) {
+ device->task_add(task);
+ device->task_wait();
+
+ /* set display to new size */
+ display->draw_set(task.w, task.h);
+ }
+
+ display_outdated = false;
+}
+
+bool Session::update_progressive_refine(bool cancel)
+{
+ int sample = tile_manager.state.sample + 1;
+ bool write = sample == tile_manager.num_samples || cancel;
+
+ double current_time = time_dt();
+
+ if (current_time - last_update_time < params.progressive_update_timeout) {
+ /* if last sample was processed, we need to write buffers anyway */
+ if (!write && sample != 1)
+ return false;
+ }
+
+ if (params.progressive_refine) {
+ foreach (Tile &tile, tile_manager.state.tiles) {
+ if (!tile.buffers) {
+ continue;
+ }
+
+ RenderTile rtile;
+ rtile.x = tile_manager.state.buffer.full_x + tile.x;
+ rtile.y = tile_manager.state.buffer.full_y + tile.y;
+ rtile.w = tile.w;
+ rtile.h = tile.h;
+ rtile.sample = sample;
+ rtile.buffers = tile.buffers;
+
+ if (write) {
+ if (write_render_tile_cb)
+ write_render_tile_cb(rtile);
+ }
+ else {
+ if (update_render_tile_cb)
+ update_render_tile_cb(rtile, true);
+ }
+ }
+ }
+
+ last_update_time = current_time;
+
+ return write;
+}
+
+void Session::device_free()
+{
+ scene->device_free();
+
+ tile_manager.device_free();
+
+ /* used from background render only, so no need to
+ * re-create render/display buffers here
+ */
+}
+
+void Session::collect_statistics(RenderStats *render_stats)
+{
+ scene->collect_statistics(render_stats);
+ if (params.use_profiling && (params.device.type == DEVICE_CPU)) {
+ render_stats->collect_profiling(scene, profiler);
+ }
+}
+
+int Session::get_max_closure_count()
+{
+ if (scene->shader_manager->use_osl()) {
+ /* OSL always needs the maximum as we can't predict the
+ * number of closures a shader might generate. */
+ return MAX_CLOSURE;
+ }
+
+ int max_closures = 0;
+ for (int i = 0; i < scene->shaders.size(); i++) {
+ int num_closures = scene->shaders[i]->graph->get_num_closures();
+ max_closures = max(max_closures, num_closures);
+ }
+ max_closure_global = max(max_closure_global, max_closures);
+
+ if (max_closure_global > MAX_CLOSURE) {
+ /* This is usually harmless as more complex shader tend to get many
+ * closures discarded due to mixing or low weights. We need to limit
+ * to MAX_CLOSURE as this is hardcoded in CPU/mega kernels, and it
+ * avoids excessive memory usage for split kernels. */
+ VLOG(2) << "Maximum number of closures exceeded: " << max_closure_global << " > "
+ << MAX_CLOSURE;
+
+ max_closure_global = MAX_CLOSURE;
+ }
+
+ return max_closure_global;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/session.h b/intern/cycles/render/session.h
--- a/intern/cycles/render/session.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/session.h 2020-01-10 20:42:43.474256721 +0300
@@ -55,6 +55,7 @@
int start_resolution;
int pixel_size;
int threads;
+ bool adaptive_sampling;
bool use_profiling;
@@ -87,6 +88,7 @@
start_resolution = INT_MAX;
pixel_size = 1;
threads = 0;
+ adaptive_sampling = false;
use_profiling = false;
@@ -114,6 +116,7 @@
&& progressive == params.progressive && experimental == params.experimental &&
tile_size == params.tile_size && start_resolution == params.start_resolution &&
pixel_size == params.pixel_size && threads == params.threads &&
+ adaptive_sampling == params.adaptive_sampling &&
use_profiling == params.use_profiling &&
display_buffer_linear == params.display_buffer_linear &&
cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout &&
diff -Naur a/intern/cycles/render/session.h.orig b/intern/cycles/render/session.h.orig
--- a/intern/cycles/render/session.h.orig 1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/session.h.orig 2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SESSION_H__
+#define __SESSION_H__
+
+#include "render/buffers.h"
+#include "device/device.h"
+#include "render/shader.h"
+#include "render/stats.h"
+#include "render/tile.h"
+
+#include "util/util_progress.h"
+#include "util/util_stats.h"
+#include "util/util_thread.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BufferParams;
+class Device;
+class DeviceScene;
+class DeviceRequestedFeatures;
+class DisplayBuffer;
+class Progress;
+class RenderBuffers;
+class Scene;
+
+/* Session Parameters */
+
+class SessionParams {
+ public:
+ DeviceInfo device;
+ bool background;
+ bool progressive_refine;
+
+ bool progressive;
+ bool experimental;
+ int samples;
+ int2 tile_size;
+ TileOrder tile_order;
+ int start_resolution;
+ int pixel_size;
+ int threads;
+
+ bool use_profiling;
+
+ bool display_buffer_linear;
+
+ bool run_denoising;
+ bool write_denoising_passes;
+ bool full_denoising;
+ bool optix_denoising;
+ DenoiseParams denoising;
+
+ double cancel_timeout;
+ double reset_timeout;
+ double text_timeout;
+ double progressive_update_timeout;
+
+ ShadingSystem shadingsystem;
+
+ function<bool(const uchar *pixels, int width, int height, int channels)> write_render_cb;
+
+ SessionParams()
+ {
+ background = false;
+ progressive_refine = false;
+
+ progressive = false;
+ experimental = false;
+ samples = 1024;
+ tile_size = make_int2(64, 64);
+ start_resolution = INT_MAX;
+ pixel_size = 1;
+ threads = 0;
+
+ use_profiling = false;
+
+ run_denoising = false;
+ write_denoising_passes = false;
+ full_denoising = false;
+ optix_denoising = false;
+
+ display_buffer_linear = false;
+
+ cancel_timeout = 0.1;
+ reset_timeout = 0.1;
+ text_timeout = 1.0;
+ progressive_update_timeout = 1.0;
+
+ shadingsystem = SHADINGSYSTEM_SVM;
+ tile_order = TILE_CENTER;
+ }
+
+ bool modified(const SessionParams &params)
+ {
+ return !(device == params.device && background == params.background &&
+ progressive_refine == params.progressive_refine
+ /* && samples == params.samples */
+ && progressive == params.progressive && experimental == params.experimental &&
+ tile_size == params.tile_size && start_resolution == params.start_resolution &&
+ pixel_size == params.pixel_size && threads == params.threads &&
+ use_profiling == params.use_profiling &&
+ display_buffer_linear == params.display_buffer_linear &&
+ cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout &&
+ text_timeout == params.text_timeout &&
+ progressive_update_timeout == params.progressive_update_timeout &&
+ tile_order == params.tile_order && shadingsystem == params.shadingsystem);
+ }
+};
+
+/* Session
+ *
+ * This is the class that contains the session thread, running the render
+ * control loop and dispatching tasks. */
+
+class Session {
+ public:
+ Device *device;
+ Scene *scene;
+ RenderBuffers *buffers;
+ DisplayBuffer *display;
+ Progress progress;
+ SessionParams params;
+ TileManager tile_manager;
+ Stats stats;
+ Profiler profiler;
+
+ function<void(RenderTile &)> write_render_tile_cb;
+ function<void(RenderTile &, bool)> update_render_tile_cb;
+
+ explicit Session(const SessionParams &params);
+ ~Session();
+
+ void start();
+ bool draw(BufferParams &params, DeviceDrawParams &draw_params);
+ void wait();
+
+ bool ready_to_reset();
+ void reset(BufferParams &params, int samples);
+ void set_samples(int samples);
+ void set_pause(bool pause);
+
+ bool update_scene();
+ bool load_kernels(bool lock_scene = true);
+
+ void device_free();
+
+ /* Returns the rendering progress or 0 if no progress can be determined
+ * (for example, when rendering with unlimited samples). */
+ float get_progress();
+
+ void collect_statistics(RenderStats *stats);
+
+ protected:
+ struct DelayedReset {
+ thread_mutex mutex;
+ bool do_reset;
+ BufferParams params;
+ int samples;
+ } delayed_reset;
+
+ void run();
+
+ void update_status_time(bool show_pause = false, bool show_done = false);
+
+ void copy_to_display_buffer(int sample);
+ void render();
+ void reset_(BufferParams &params, int samples);
+
+ void run_cpu();
+ bool draw_cpu(BufferParams &params, DeviceDrawParams &draw_params);
+ void reset_cpu(BufferParams &params, int samples);
+
+ void run_gpu();
+ bool draw_gpu(BufferParams &params, DeviceDrawParams &draw_params);
+ void reset_gpu(BufferParams &params, int samples);
+
+ bool acquire_tile(Device *tile_device, RenderTile &tile);
+ void update_tile_sample(RenderTile &tile);
+ void release_tile(RenderTile &tile);
+
+ void map_neighbor_tiles(RenderTile *tiles, Device *tile_device);
+ void unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device);
+
+ bool device_use_gl;
+
+ thread *session_thread;
+
+ volatile bool display_outdated;
+
+ volatile bool gpu_draw_ready;
+ volatile bool gpu_need_display_buffer_update;
+ thread_condition_variable gpu_need_display_buffer_update_cond;
+
+ bool pause;
+ thread_condition_variable pause_cond;
+ thread_mutex pause_mutex;
+ thread_mutex tile_mutex;
+ thread_mutex buffers_mutex;
+ thread_mutex display_mutex;
+
+ bool kernels_loaded;
+ DeviceRequestedFeatures loaded_kernel_features;
+
+ double reset_time;
+
+ /* progressive refine */
+ double last_update_time;
+ bool update_progressive_refine(bool cancel);
+
+ DeviceRequestedFeatures get_requested_device_features();
+
+ /* ** Split kernel routines ** */
+
+ /* Maximumnumber of closure during session lifetime. */
+ int max_closure_global;
+
+ /* Get maximum number of closures to be used in kernel. */
+ int get_max_closure_count();
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __SESSION_H__ */
diff -Naur a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
--- a/intern/cycles/util/util_atomic.h 2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/util/util_atomic.h 2020-01-10 20:42:43.474256721 +0300
@@ -77,6 +77,7 @@
# define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
# define atomic_fetch_and_inc_uint32(p) atomic_inc((p))
# define atomic_fetch_and_dec_uint32(p) atomic_dec((p))
+# define atomic_fetch_and_or_uint32(p, x) atomic_or((p), (x))
# define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
# define ccl_barrier(flags) barrier(flags)
@@ -91,6 +92,7 @@
# define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x))
# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
+# define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x))
ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest,
const float old_val,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment