brothermechanic/D4686_master.diff

## D4686_master.diff
diff -Naur a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
--- a/intern/cycles/blender/addon/engine.py	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/addon/engine.py	2020-01-10 21:01:30.634277071 +0300
@@ -258,6 +258,7 @@
     if crl.pass_debug_bvh_traversed_instances: yield ("Debug BVH Traversed Instances", "X",   'VALUE')
     if crl.pass_debug_bvh_intersections:       yield ("Debug BVH Intersections",       "X",   'VALUE')
     if crl.pass_debug_ray_bounces:             yield ("Debug Ray Bounces",             "X",   'VALUE')
+    if crl.pass_debug_sample_count:            yield ("Debug Sample Count",            "X",   'VALUE')
     if crl.use_pass_volume_direct:             yield ("VolumeDir",                     "RGB", 'COLOR')
     if crl.use_pass_volume_indirect:           yield ("VolumeInd",                     "RGB", 'COLOR')

diff -Naur a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
--- a/intern/cycles/blender/addon/properties.py	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/addon/properties.py	2020-01-10 21:06:42.827616043 +0300
@@ -112,6 +112,7 @@
 enum_sampling_pattern = (
     ('SOBOL', "Sobol", "Use Sobol random sampling pattern"),
     ('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"),
+    ('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"),
 )

 enum_integrator = (
@@ -357,6 +358,26 @@
             default=0,
     )

+    adaptive_threshold: FloatProperty(
+        name="Adaptive Sampling Threshold",
+        description="Zero for automatic setting based on AA samples",
+        min=0.0, max=1.0,
+        default=0.0,
+    )
+
+    adaptive_min_samples: IntProperty(
+        name="Adaptive Min Samples",
+        description="Minimum AA samples for adaptive sampling. Zero for automatic setting based on AA samples",
+        min=0, max=4096,
+        default=0,
+    )
+
+    use_adaptive_sampling: BoolProperty(
+        name="Use adaptive sampling",
+        description="Automatically determine the number of samples per pixel based on a variance estimation",
+        default=False,
+    )
+
     caustics_reflective: BoolProperty(
         name="Reflective Caustics",
         description="Use reflective caustics, resulting in a brighter image (more noise but added realism)",
@@ -1285,6 +1306,12 @@
         default=False,
         update=update_render_passes,
     )
+    pass_debug_sample_count: BoolProperty(
+        name="Debug Sample Count",
+        description="Number of samples/camera rays per pixel",
+        default=False,
+        update=update_render_passes,
+    )

     use_pass_volume_direct: BoolProperty(
         name="Volume Direct",
diff -Naur a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
--- a/intern/cycles/blender/addon/ui.py	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/addon/ui.py	2020-01-10 20:42:43.454256722 +0300
@@ -188,6 +188,8 @@
             col.prop(cscene, "aa_samples", text="Render")
             col.prop(cscene, "preview_aa_samples", text="Viewport")

+        col.prop(cscene, "use_adaptive_sampling", text="Adaptive Sampling")
+

 class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
     bl_label = "Sub Samples"
@@ -239,7 +241,13 @@
         row.prop(cscene, "seed")
         row.prop(cscene, "use_animated_seed", text="", icon='TIME')

-        layout.prop(cscene, "sampling_pattern", text="Pattern")
+        col = layout.column(align=True)
+        col.active = not(cscene.use_adaptive_sampling)
+        col.prop(cscene, "sampling_pattern", text="Pattern")
+        col = layout.column(align=True)
+        col.active = cscene.use_adaptive_sampling
+        col.prop(cscene, "adaptive_min_samples", text="Adaptive Min Samples")
+        col.prop(cscene, "adaptive_threshold", text="Adaptive Threshold")

         layout.prop(cscene, "use_square_samples")

@@ -803,6 +811,8 @@
         col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data")
         col = flow.column()
         col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
+        col = flow.column()
+        col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")

         layout.separator()

diff -Naur a/intern/cycles/blender/addon/ui.py.orig b/intern/cycles/blender/addon/ui.py.orig
--- a/intern/cycles/blender/addon/ui.py.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/blender/addon/ui.py.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,2356 @@
+#
+# Copyright 2011-2013 Blender Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# <pep8 compliant>
+
+import bpy
+from bpy_extras.node_utils import find_node_input
+from bl_ui.utils import PresetPanel
+
+from bpy.types import Panel
+
+from bl_ui.properties_grease_pencil_common import GreasePencilSimplifyPanel
+
+
+class CYCLES_PT_sampling_presets(PresetPanel, Panel):
+    bl_label = "Sampling Presets"
+    preset_subdir = "cycles/sampling"
+    preset_operator = "script.execute_preset"
+    preset_add_operator = "render.cycles_sampling_preset_add"
+    COMPAT_ENGINES = {'CYCLES'}
+
+
+class CYCLES_PT_integrator_presets(PresetPanel, Panel):
+    bl_label = "Integrator Presets"
+    preset_subdir = "cycles/integrator"
+    preset_operator = "script.execute_preset"
+    preset_add_operator = "render.cycles_integrator_preset_add"
+    COMPAT_ENGINES = {'CYCLES'}
+
+
+class CyclesButtonsPanel:
+    bl_space_type = "PROPERTIES"
+    bl_region_type = "WINDOW"
+    bl_context = "render"
+    COMPAT_ENGINES = {'CYCLES'}
+
+    @classmethod
+    def poll(cls, context):
+        return context.engine in cls.COMPAT_ENGINES
+
+
+# Adapt properties editor panel to display in node editor. We have to
+# copy the class rather than inherit due to the way bpy registration works.
+def node_panel(cls):
+    node_cls = type('NODE_' + cls.__name__, cls.__bases__, dict(cls.__dict__))
+
+    node_cls.bl_space_type = 'NODE_EDITOR'
+    node_cls.bl_region_type = 'UI'
+    node_cls.bl_category = "Options"
+    if hasattr(node_cls, 'bl_parent_id'):
+        node_cls.bl_parent_id = 'NODE_' + node_cls.bl_parent_id
+
+    return node_cls
+
+
+def get_device_type(context):
+    return context.preferences.addons[__package__].preferences.compute_device_type
+
+
+def use_cpu(context):
+    cscene = context.scene.cycles
+
+    return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')
+
+
+def use_opencl(context):
+    cscene = context.scene.cycles
+
+    return (get_device_type(context) == 'OPENCL' and cscene.device == 'GPU')
+
+
+def use_cuda(context):
+    cscene = context.scene.cycles
+
+    return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')
+
+
+def use_optix(context):
+    cscene = context.scene.cycles
+
+    return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU')
+
+
+def use_branched_path(context):
+    cscene = context.scene.cycles
+
+    return (cscene.progressive == 'BRANCHED_PATH' and not use_optix(context))
+
+
+def use_sample_all_lights(context):
+    cscene = context.scene.cycles
+
+    return cscene.sample_all_lights_direct or cscene.sample_all_lights_indirect
+
+
+def show_device_active(context):
+    cscene = context.scene.cycles
+    if cscene.device != 'GPU':
+        return True
+    return context.preferences.addons[__package__].preferences.has_active_device()
+
+
+def draw_samples_info(layout, context):
+    cscene = context.scene.cycles
+    integrator = cscene.progressive
+
+    # Calculate sample values
+    if integrator == 'PATH':
+        aa = cscene.samples
+        if cscene.use_square_samples:
+            aa = aa * aa
+    else:
+        aa = cscene.aa_samples
+        d = cscene.diffuse_samples
+        g = cscene.glossy_samples
+        t = cscene.transmission_samples
+        ao = cscene.ao_samples
+        ml = cscene.mesh_light_samples
+        sss = cscene.subsurface_samples
+        vol = cscene.volume_samples
+
+        if cscene.use_square_samples:
+            aa = aa * aa
+            d = d * d
+            g = g * g
+            t = t * t
+            ao = ao * ao
+            ml = ml * ml
+            sss = sss * sss
+            vol = vol * vol
+
+    # Draw interface
+    # Do not draw for progressive, when Square Samples are disabled
+    if use_branched_path(context) or (cscene.use_square_samples and integrator == 'PATH'):
+        col = layout.column(align=True)
+        col.scale_y = 0.6
+        col.label(text="Total Samples:")
+        col.separator()
+        if integrator == 'PATH':
+            col.label(text="%s AA" % aa)
+        else:
+            col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" %
+                      (aa, d * aa, g * aa, t * aa))
+            col.separator()
+            col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" %
+                      (ao * aa, ml * aa, sss * aa, vol * aa))
+
+
+class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel):
+    bl_label = "Sampling"
+
+    def draw_header_preset(self, context):
+        CYCLES_PT_sampling_presets.draw_panel_header(self.layout)
+
+    def draw(self, context):
+        layout = self.layout
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        if not use_optix(context):
+            layout.prop(cscene, "progressive")
+
+        if cscene.progressive == 'PATH' or use_branched_path(context) is False:
+            col = layout.column(align=True)
+            col.prop(cscene, "samples", text="Render")
+            col.prop(cscene, "preview_samples", text="Viewport")
+
+            draw_samples_info(layout, context)
+        else:
+            col = layout.column(align=True)
+            col.prop(cscene, "aa_samples", text="Render")
+            col.prop(cscene, "preview_aa_samples", text="Viewport")
+
+
+class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
+    bl_label = "Sub Samples"
+    bl_parent_id = "CYCLES_RENDER_PT_sampling"
+
+    @classmethod
+    def poll(cls, context):
+        scene = context.scene
+        cscene = scene.cycles
+        return cscene.progressive != 'PATH' and use_branched_path(context)
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column(align=True)
+        col.prop(cscene, "diffuse_samples", text="Diffuse")
+        col.prop(cscene, "glossy_samples", text="Glossy")
+        col.prop(cscene, "transmission_samples", text="Transmission")
+        col.prop(cscene, "ao_samples", text="AO")
+
+        sub = col.row(align=True)
+        sub.active = use_sample_all_lights(context)
+        sub.prop(cscene, "mesh_light_samples", text="Mesh Light")
+        col.prop(cscene, "subsurface_samples", text="Subsurface")
+        col.prop(cscene, "volume_samples", text="Volume")
+
+        draw_samples_info(layout, context)
+
+
+class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
+    bl_label = "Advanced"
+    bl_parent_id = "CYCLES_RENDER_PT_sampling"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        row = layout.row(align=True)
+        row.prop(cscene, "seed")
+        row.prop(cscene, "use_animated_seed", text="", icon='TIME')
+
+        layout.prop(cscene, "sampling_pattern", text="Pattern")
+
+        layout.prop(cscene, "use_square_samples")
+
+        layout.separator()
+
+        col = layout.column(align=True)
+        col.prop(cscene, "min_light_bounces")
+        col.prop(cscene, "min_transparent_bounces")
+        col.prop(cscene, "light_sampling_threshold", text="Light Threshold")
+
+        if cscene.progressive != 'PATH' and use_branched_path(context):
+            col = layout.column(align=True)
+            col.prop(cscene, "sample_all_lights_direct")
+            col.prop(cscene, "sample_all_lights_indirect")
+
+        for view_layer in scene.view_layers:
+            if view_layer.samples > 0:
+                layout.separator()
+                layout.row().prop(cscene, "use_layer_samples")
+                break
+
+
+class CYCLES_RENDER_PT_sampling_total(CyclesButtonsPanel, Panel):
+    bl_label = "Total Samples"
+    bl_parent_id = "CYCLES_RENDER_PT_sampling"
+
+    @classmethod
+    def poll(cls, context):
+        scene = context.scene
+        cscene = scene.cycles
+
+        if cscene.use_square_samples:
+            return True
+
+        return cscene.progressive != 'PATH' and use_branched_path(context)
+
+    def draw(self, context):
+        layout = self.layout
+        cscene = context.scene.cycles
+        integrator = cscene.progressive
+
+        # Calculate sample values
+        if integrator == 'PATH':
+            aa = cscene.samples
+            if cscene.use_square_samples:
+                aa = aa * aa
+        else:
+            aa = cscene.aa_samples
+            d = cscene.diffuse_samples
+            g = cscene.glossy_samples
+            t = cscene.transmission_samples
+            ao = cscene.ao_samples
+            ml = cscene.mesh_light_samples
+            sss = cscene.subsurface_samples
+            vol = cscene.volume_samples
+
+            if cscene.use_square_samples:
+                aa = aa * aa
+                d = d * d
+                g = g * g
+                t = t * t
+                ao = ao * ao
+                ml = ml * ml
+                sss = sss * sss
+                vol = vol * vol
+
+        col = layout.column(align=True)
+        col.scale_y = 0.6
+        if integrator == 'PATH':
+            col.label(text="%s AA" % aa)
+        else:
+            col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" %
+                      (aa, d * aa, g * aa, t * aa))
+            col.separator()
+            col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" %
+                      (ao * aa, ml * aa, sss * aa, vol * aa))
+
+
+class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
+    bl_label = "Subdivision"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        return (context.scene.render.engine == 'CYCLES') and (context.scene.cycles.feature_set == 'EXPERIMENTAL')
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column()
+        sub = col.column(align=True)
+        sub.prop(cscene, "dicing_rate", text="Dicing Rate Render")
+        sub.prop(cscene, "preview_dicing_rate", text="Preview")
+
+        col.separator()
+
+        col.prop(cscene, "offscreen_dicing_scale", text="Offscreen Scale")
+        col.prop(cscene, "max_subdivisions")
+
+        col.prop(cscene, "dicing_camera")
+
+
+class CYCLES_RENDER_PT_hair(CyclesButtonsPanel, Panel):
+    bl_label = "Hair"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw_header(self, context):
+        layout = self.layout
+        scene = context.scene
+        ccscene = scene.cycles_curves
+
+        layout.prop(ccscene, "use_curves", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        ccscene = scene.cycles_curves
+
+        layout.active = ccscene.use_curves
+
+        col = layout.column()
+        col.prop(ccscene, "shape", text="Shape")
+        if not (ccscene.primitive in {'CURVE_SEGMENTS', 'LINE_SEGMENTS'} and ccscene.shape == 'RIBBONS'):
+            col.prop(ccscene, "cull_backfacing", text="Cull back-faces")
+        col.prop(ccscene, "primitive", text="Primitive")
+
+        if ccscene.primitive == 'TRIANGLES' and ccscene.shape == 'THICK':
+            col.prop(ccscene, "resolution", text="Resolution")
+        elif ccscene.primitive == 'CURVE_SEGMENTS':
+            col.prop(ccscene, "subdivisions", text="Curve subdivisions")
+
+
+class CYCLES_RENDER_PT_volumes(CyclesButtonsPanel, Panel):
+    bl_label = "Volumes"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column()
+        col.prop(cscene, "volume_step_size", text="Step Size")
+        col.prop(cscene, "volume_max_steps", text="Max Steps")
+
+
+class CYCLES_RENDER_PT_light_paths(CyclesButtonsPanel, Panel):
+    bl_label = "Light Paths"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw_header_preset(self, context):
+        CYCLES_PT_integrator_presets.draw_panel_header(self.layout)
+
+    def draw(self, context):
+        pass
+
+
+class CYCLES_RENDER_PT_light_paths_max_bounces(CyclesButtonsPanel, Panel):
+    bl_label = "Max Bounces"
+    bl_parent_id = "CYCLES_RENDER_PT_light_paths"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column(align=True)
+        col.prop(cscene, "max_bounces", text="Total")
+
+        col = layout.column(align=True)
+        col.prop(cscene, "diffuse_bounces", text="Diffuse")
+        col.prop(cscene, "glossy_bounces", text="Glossy")
+        col.prop(cscene, "transparent_max_bounces", text="Transparency")
+        col.prop(cscene, "transmission_bounces", text="Transmission")
+        col.prop(cscene, "volume_bounces", text="Volume")
+
+
+class CYCLES_RENDER_PT_light_paths_clamping(CyclesButtonsPanel, Panel):
+    bl_label = "Clamping"
+    bl_parent_id = "CYCLES_RENDER_PT_light_paths"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column(align=True)
+        col.prop(cscene, "sample_clamp_direct", text="Direct Light")
+        col.prop(cscene, "sample_clamp_indirect", text="Indirect Light")
+
+
+class CYCLES_RENDER_PT_light_paths_caustics(CyclesButtonsPanel, Panel):
+    bl_label = "Caustics"
+    bl_parent_id = "CYCLES_RENDER_PT_light_paths"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column()
+        col.prop(cscene, "blur_glossy")
+        col.prop(cscene, "caustics_reflective")
+        col.prop(cscene, "caustics_refractive")
+
+
+class CYCLES_RENDER_PT_motion_blur(CyclesButtonsPanel, Panel):
+    bl_label = "Motion Blur"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw_header(self, context):
+        rd = context.scene.render
+
+        self.layout.prop(rd, "use_motion_blur", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+        rd = scene.render
+        layout.active = rd.use_motion_blur
+
+        col = layout.column()
+        col.prop(cscene, "motion_blur_position", text="Position")
+        col.prop(rd, "motion_blur_shutter")
+        col.separator()
+        col.prop(cscene, "rolling_shutter_type", text="Rolling Shutter")
+        sub = col.column()
+        sub.active = cscene.rolling_shutter_type != 'NONE'
+        sub.prop(cscene, "rolling_shutter_duration")
+
+
+class CYCLES_RENDER_PT_motion_blur_curve(CyclesButtonsPanel, Panel):
+    bl_label = "Shutter Curve"
+    bl_parent_id = "CYCLES_RENDER_PT_motion_blur"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        rd = scene.render
+        layout.active = rd.use_motion_blur
+
+        col = layout.column()
+
+        col.template_curve_mapping(rd, "motion_blur_shutter_curve")
+
+        col = layout.column(align=True)
+        row = col.row(align=True)
+        row.operator("render.shutter_curve_preset", icon='SMOOTHCURVE', text="").shape = 'SMOOTH'
+        row.operator("render.shutter_curve_preset", icon='SPHERECURVE', text="").shape = 'ROUND'
+        row.operator("render.shutter_curve_preset", icon='ROOTCURVE', text="").shape = 'ROOT'
+        row.operator("render.shutter_curve_preset", icon='SHARPCURVE', text="").shape = 'SHARP'
+        row.operator("render.shutter_curve_preset", icon='LINCURVE', text="").shape = 'LINE'
+        row.operator("render.shutter_curve_preset", icon='NOCURVE', text="").shape = 'MAX'
+
+
+class CYCLES_RENDER_PT_film(CyclesButtonsPanel, Panel):
+    bl_label = "Film"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column()
+        col.prop(cscene, "film_exposure")
+
+
+class CYCLES_RENDER_PT_film_transparency(CyclesButtonsPanel, Panel):
+    bl_label = "Transparent"
+    bl_parent_id = "CYCLES_RENDER_PT_film"
+
+    def draw_header(self, context):
+        layout = self.layout
+
+        scene = context.scene
+        rd = scene.render
+
+        layout.prop(rd, "film_transparent", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+        scene = context.scene
+        rd = scene.render
+        cscene = scene.cycles
+
+        layout.active = rd.film_transparent
+
+        col = layout.column()
+        col.prop(cscene, "film_transparent_glass", text="Transparent Glass")
+
+        sub = col.column()
+        sub.active = rd.film_transparent and cscene.film_transparent_glass
+        sub.prop(cscene, "film_transparent_roughness", text="Roughness Threshold")
+
+
+class CYCLES_RENDER_PT_film_pixel_filter(CyclesButtonsPanel, Panel):
+    bl_label = "Pixel Filter"
+    bl_parent_id = "CYCLES_RENDER_PT_film"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column()
+        col.prop(cscene, "pixel_filter_type", text="Type")
+        if cscene.pixel_filter_type != 'BOX':
+            col.prop(cscene, "filter_width", text="Width")
+
+
+class CYCLES_RENDER_PT_performance(CyclesButtonsPanel, Panel):
+    bl_label = "Performance"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw(self, context):
+        pass
+
+
+class CYCLES_RENDER_PT_performance_threads(CyclesButtonsPanel, Panel):
+    bl_label = "Threads"
+    bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        rd = scene.render
+
+        col = layout.column()
+
+        col.prop(rd, "threads_mode")
+        sub = col.column(align=True)
+        sub.enabled = rd.threads_mode == 'FIXED'
+        sub.prop(rd, "threads")
+
+
+class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
+    bl_label = "Tiles"
+    bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        rd = scene.render
+        cscene = scene.cycles
+
+        col = layout.column()
+
+        sub = col.column(align=True)
+        sub.prop(rd, "tile_x", text="Tiles X")
+        sub.prop(rd, "tile_y", text="Y")
+        col.prop(cscene, "tile_order", text="Order")
+
+        sub = col.column()
+        sub.active = not rd.use_save_buffers
+        for view_layer in scene.view_layers:
+            if view_layer.cycles.use_denoising:
+                sub.active = False
+        sub.prop(cscene, "use_progressive_refine")
+
+
+class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Panel):
+    bl_label = "Acceleration Structure"
+    bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+    def draw(self, context):
+        import _cycles
+
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column()
+
+        if _cycles.with_embree:
+            row = col.row()
+            row.active = use_cpu(context)
+            row.prop(cscene, "use_bvh_embree")
+        col.prop(cscene, "debug_use_spatial_splits")
+        sub = col.column()
+        sub.active = not cscene.use_bvh_embree or not _cycles.with_embree
+        sub.prop(cscene, "debug_use_hair_bvh")
+        sub = col.column()
+        sub.active = not cscene.debug_use_spatial_splits and not cscene.use_bvh_embree
+        sub.prop(cscene, "debug_bvh_time_steps")
+
+
+class CYCLES_RENDER_PT_performance_final_render(CyclesButtonsPanel, Panel):
+    bl_label = "Final Render"
+    bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        rd = scene.render
+
+        col = layout.column()
+
+        col.prop(rd, "use_save_buffers")
+        col.prop(rd, "use_persistent_data", text="Persistent Images")
+
+
+class CYCLES_RENDER_PT_performance_viewport(CyclesButtonsPanel, Panel):
+    bl_label = "Viewport"
+    bl_parent_id = "CYCLES_RENDER_PT_performance"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        rd = scene.render
+        cscene = scene.cycles
+
+        col = layout.column()
+        col.prop(rd, "preview_pixel_size", text="Pixel Size")
+        col.prop(cscene, "preview_start_resolution", text="Start Pixels")
+
+
+class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
+    bl_label = "Filter"
+    bl_options = {'DEFAULT_CLOSED'}
+    bl_context = "view_layer"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        with_freestyle = bpy.app.build_options.freestyle
+
+        scene = context.scene
+        rd = scene.render
+        view_layer = context.view_layer
+
+        flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+
+        col = flow.column()
+        col.prop(view_layer, "use_sky", text="Environment")
+        col = flow.column()
+        col.prop(view_layer, "use_ao", text="Ambient Occlusion")
+        col = flow.column()
+        col.prop(view_layer, "use_solid", text="Surfaces")
+        col = flow.column()
+        col.prop(view_layer, "use_strand", text="Hair")
+        if with_freestyle:
+            col = flow.column()
+            col.prop(view_layer, "use_freestyle", text="Freestyle")
+            col.active = rd.use_freestyle
+
+
+class CYCLES_RENDER_PT_override(CyclesButtonsPanel, Panel):
+    bl_label = "Override"
+    bl_options = {'DEFAULT_CLOSED'}
+    bl_context = "view_layer"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        view_layer = context.view_layer
+
+        layout.prop(view_layer, "material_override")
+        layout.prop(view_layer, "samples")
+
+
+class CYCLES_RENDER_PT_passes(CyclesButtonsPanel, Panel):
+    bl_label = "Passes"
+    bl_context = "view_layer"
+
+    def draw(self, context):
+        pass
+
+
+class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
+    bl_label = "Data"
+    bl_context = "view_layer"
+    bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        rd = scene.render
+        view_layer = context.view_layer
+        cycles_view_layer = view_layer.cycles
+
+        flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+        col = flow.column()
+        col.prop(view_layer, "use_pass_combined")
+        col = flow.column()
+        col.prop(view_layer, "use_pass_z")
+        col = flow.column()
+        col.prop(view_layer, "use_pass_mist")
+        col = flow.column()
+        col.prop(view_layer, "use_pass_normal")
+        col = flow.column()
+        col.prop(view_layer, "use_pass_vector")
+        col.active = not rd.use_motion_blur
+        col = flow.column()
+        col.prop(view_layer, "use_pass_uv")
+        col = flow.column()
+        col.prop(view_layer, "use_pass_object_index")
+        col = flow.column()
+        col.prop(view_layer, "use_pass_material_index")
+
+        layout.separator()
+
+        flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+        col = flow.column()
+        col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data")
+        col = flow.column()
+        col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
+
+        layout.separator()
+
+        layout.prop(view_layer, "pass_alpha_threshold")
+
+
+class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
+    bl_label = "Light"
+    bl_context = "view_layer"
+    bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        view_layer = context.view_layer
+        cycles_view_layer = view_layer.cycles
+
+        split = layout.split(factor=0.35)
+        split.use_property_split = False
+        split.label(text="Diffuse")
+        row = split.row(align=True)
+        row.prop(view_layer, "use_pass_diffuse_direct", text="Direct", toggle=True)
+        row.prop(view_layer, "use_pass_diffuse_indirect", text="Indirect", toggle=True)
+        row.prop(view_layer, "use_pass_diffuse_color", text="Color", toggle=True)
+
+        split = layout.split(factor=0.35)
+        split.use_property_split = False
+        split.label(text="Glossy")
+        row = split.row(align=True)
+        row.prop(view_layer, "use_pass_glossy_direct", text="Direct", toggle=True)
+        row.prop(view_layer, "use_pass_glossy_indirect", text="Indirect", toggle=True)
+        row.prop(view_layer, "use_pass_glossy_color", text="Color", toggle=True)
+
+        split = layout.split(factor=0.35)
+        split.use_property_split = False
+        split.label(text="Transmission")
+        row = split.row(align=True)
+        row.prop(view_layer, "use_pass_transmission_direct", text="Direct", toggle=True)
+        row.prop(view_layer, "use_pass_transmission_indirect", text="Indirect", toggle=True)
+        row.prop(view_layer, "use_pass_transmission_color", text="Color", toggle=True)
+
+        split = layout.split(factor=0.35)
+        split.use_property_split = False
+        split.label(text="Subsurface")
+        row = split.row(align=True)
+        row.prop(view_layer, "use_pass_subsurface_direct", text="Direct", toggle=True)
+        row.prop(view_layer, "use_pass_subsurface_indirect", text="Indirect", toggle=True)
+        row.prop(view_layer, "use_pass_subsurface_color", text="Color", toggle=True)
+
+        split = layout.split(factor=0.35)
+        split.use_property_split = False
+        split.label(text="Volume")
+        row = split.row(align=True)
+        row.prop(cycles_view_layer, "use_pass_volume_direct", text="Direct", toggle=True)
+        row.prop(cycles_view_layer, "use_pass_volume_indirect", text="Indirect", toggle=True)
+
+        col = layout.column(align=True)
+        col.prop(view_layer, "use_pass_emit", text="Emission")
+        col.prop(view_layer, "use_pass_environment")
+        col.prop(view_layer, "use_pass_shadow")
+        col.prop(view_layer, "use_pass_ambient_occlusion", text="Ambient Occlusion")
+
+
+class CYCLES_RENDER_PT_passes_crypto(CyclesButtonsPanel, Panel):
+    bl_label = "Cryptomatte"
+    bl_context = "view_layer"
+    bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+    def draw(self, context):
+        import _cycles
+
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        cycles_view_layer = context.view_layer.cycles
+
+        row = layout.row(align=True)
+        row.use_property_split = False
+        row.prop(cycles_view_layer, "use_pass_crypto_object", text="Object", toggle=True)
+        row.prop(cycles_view_layer, "use_pass_crypto_material", text="Material", toggle=True)
+        row.prop(cycles_view_layer, "use_pass_crypto_asset", text="Asset", toggle=True)
+
+        layout.prop(cycles_view_layer, "pass_crypto_depth", text="Levels")
+
+        row = layout.row(align=True)
+        row.active = use_cpu(context)
+        row.prop(cycles_view_layer, "pass_crypto_accurate", text="Accurate Mode")
+
+
+class CYCLES_RENDER_PT_passes_debug(CyclesButtonsPanel, Panel):
+    bl_label = "Debug"
+    bl_context = "view_layer"
+    bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+    @classmethod
+    def poll(cls, context):
+        import _cycles
+        return _cycles.with_cycles_debug
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        cycles_view_layer = context.view_layer.cycles
+
+        layout.prop(cycles_view_layer, "pass_debug_bvh_traversed_nodes")
+        layout.prop(cycles_view_layer, "pass_debug_bvh_traversed_instances")
+        layout.prop(cycles_view_layer, "pass_debug_bvh_intersections")
+        layout.prop(cycles_view_layer, "pass_debug_ray_bounces")
+
+
+class CYCLES_RENDER_UL_aov(bpy.types.UIList):
+    def draw_item(self, context, layout, data, item, icon, active_data, active_propname):
+        row = layout.row()
+        split = row.split(factor=0.65)
+        icon = 'ERROR' if item.conflict else 'NONE'
+        split.row().prop(item, "name", text="", icon=icon, emboss=False)
+        split.row().prop(item, "type", text="", emboss=False)
+
+
+class CYCLES_RENDER_PT_passes_aov(CyclesButtonsPanel, Panel):
+    bl_label = "Shader AOV"
+    bl_context = "view_layer"
+    bl_parent_id = "CYCLES_RENDER_PT_passes"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        cycles_view_layer = context.view_layer.cycles
+
+        row = layout.row()
+        col = row.column()
+        col.template_list("CYCLES_RENDER_UL_aov", "aovs", cycles_view_layer, "aovs", cycles_view_layer, "active_aov", rows=2)
+
+        col = row.column()
+        sub = col.column(align=True)
+        sub.operator("cycles.add_aov", icon='ADD', text="")
+        sub.operator("cycles.remove_aov", icon='REMOVE', text="")
+
+        if cycles_view_layer.active_aov < len(cycles_view_layer.aovs):
+          active_aov = cycles_view_layer.aovs[cycles_view_layer.active_aov]
+          if active_aov.conflict:
+            layout.label(text=active_aov.conflict, icon='ERROR')
+
+
+class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
+    bl_label = "Denoising"
+    bl_context = "view_layer"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    def draw_header(self, context):
+        scene = context.scene
+        view_layer = context.view_layer
+        cycles_view_layer = view_layer.cycles
+        layout = self.layout
+
+        layout.prop(cycles_view_layer, "use_denoising", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        view_layer = context.view_layer
+        cycles_view_layer = view_layer.cycles
+
+        split = layout.split()
+        split.active = cycles_view_layer.use_denoising
+
+        col = split.column(align=True)
+
+        if use_optix(context):
+            col.prop(cycles_view_layer, "use_optix_denoising", text="OptiX AI Denoising")
+
+            if cycles_view_layer.use_optix_denoising:
+                col.prop(cycles_view_layer, "denoising_optix_input_passes")
+                return
+
+            col.separator(factor=2.0)
+
+        col.prop(cycles_view_layer, "denoising_radius", text="Radius")
+        col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength")
+        col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength")
+        col.prop(cycles_view_layer, "denoising_relative_pca")
+
+        layout.separator()
+
+        split = layout.split(factor=0.5)
+        split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+
+        col = split.column()
+        col.alignment = 'RIGHT'
+        col.label(text="Diffuse")
+
+        row = split.row(align=True)
+        row.use_property_split = False
+        row.prop(cycles_view_layer, "denoising_diffuse_direct", text="Direct", toggle=True)
+        row.prop(cycles_view_layer, "denoising_diffuse_indirect", text="Indirect", toggle=True)
+
+        split = layout.split(factor=0.5)
+        split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+
+        col = split.column()
+        col.alignment = 'RIGHT'
+        col.label(text="Glossy")
+
+        row = split.row(align=True)
+        row.use_property_split = False
+        row.prop(cycles_view_layer, "denoising_glossy_direct", text="Direct", toggle=True)
+        row.prop(cycles_view_layer, "denoising_glossy_indirect", text="Indirect", toggle=True)
+
+        split = layout.split(factor=0.5)
+        split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+
+        col = split.column()
+        col.alignment = 'RIGHT'
+        col.label(text="Transmission")
+
+        row = split.row(align=True)
+        row.use_property_split = False
+        row.prop(cycles_view_layer, "denoising_transmission_direct", text="Direct", toggle=True)
+        row.prop(cycles_view_layer, "denoising_transmission_indirect", text="Indirect", toggle=True)
+
+        split = layout.split(factor=0.5)
+        split.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
+
+        col = split.column()
+        col.alignment = 'RIGHT'
+        col.label(text="Subsurface")
+
+        row = split.row(align=True)
+        row.use_property_split = False
+        row.prop(cycles_view_layer, "denoising_subsurface_direct", text="Direct", toggle=True)
+        row.prop(cycles_view_layer, "denoising_subsurface_indirect", text="Indirect", toggle=True)
+
+
+class CYCLES_PT_post_processing(CyclesButtonsPanel, Panel):
+    bl_label = "Post Processing"
+    bl_options = {'DEFAULT_CLOSED'}
+    bl_context = "output"
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        rd = context.scene.render
+
+        col = layout.column(align=True)
+        col.prop(rd, "use_compositing")
+        col.prop(rd, "use_sequencer")
+
+        layout.prop(rd, "dither_intensity", text="Dither", slider=True)
+
+
+class CYCLES_CAMERA_PT_dof(CyclesButtonsPanel, Panel):
+    bl_label = "Depth of Field"
+    bl_context = "data"
+
+    @classmethod
+    def poll(cls, context):
+        return context.camera and CyclesButtonsPanel.poll(context)
+
+    def draw_header(self, context):
+        cam = context.camera
+        dof = cam.dof
+        self.layout.prop(dof, "use_dof", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+
+        cam = context.camera
+        dof = cam.dof
+        layout.active = dof.use_dof
+
+        split = layout.split()
+
+        col = split.column()
+        col.prop(dof, "focus_object", text="Focus Object")
+
+        sub = col.row()
+        sub.active = dof.focus_object is None
+        sub.prop(dof, "focus_distance", text="Distance")
+
+
+class CYCLES_CAMERA_PT_dof_aperture(CyclesButtonsPanel, Panel):
+    bl_label = "Aperture"
+    bl_parent_id = "CYCLES_CAMERA_PT_dof"
+
+    @classmethod
+    def poll(cls, context):
+        return context.camera and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+
+        cam = context.camera
+        dof = cam.dof
+        layout.active = dof.use_dof
+        flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+
+        col = flow.column()
+        col.prop(dof, "aperture_fstop")
+        col.prop(dof, "aperture_blades")
+        col.prop(dof, "aperture_rotation")
+        col.prop(dof, "aperture_ratio")
+
+
+class CYCLES_PT_context_material(CyclesButtonsPanel, Panel):
+    bl_label = ""
+    bl_context = "material"
+    bl_options = {'HIDE_HEADER'}
+
+    @classmethod
+    def poll(cls, context):
+        if context.active_object and context.active_object.type == 'GPENCIL':
+            return False
+        else:
+            return (context.material or context.object) and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+
+        mat = context.material
+        ob = context.object
+        slot = context.material_slot
+        space = context.space_data
+
+        if ob:
+            is_sortable = len(ob.material_slots) > 1
+            rows = 1
+            if (is_sortable):
+                rows = 4
+
+            row = layout.row()
+
+            row.template_list("MATERIAL_UL_matslots", "", ob, "material_slots", ob, "active_material_index", rows=rows)
+
+            col = row.column(align=True)
+            col.operator("object.material_slot_add", icon='ADD', text="")
+            col.operator("object.material_slot_remove", icon='REMOVE', text="")
+
+            col.menu("MATERIAL_MT_context_menu", icon='DOWNARROW_HLT', text="")
+
+            if is_sortable:
+                col.separator()
+
+                col.operator("object.material_slot_move", icon='TRIA_UP', text="").direction = 'UP'
+                col.operator("object.material_slot_move", icon='TRIA_DOWN', text="").direction = 'DOWN'
+
+            if ob.mode == 'EDIT':
+                row = layout.row(align=True)
+                row.operator("object.material_slot_assign", text="Assign")
+                row.operator("object.material_slot_select", text="Select")
+                row.operator("object.material_slot_deselect", text="Deselect")
+
+        split = layout.split(factor=0.65)
+
+        if ob:
+            split.template_ID(ob, "active_material", new="material.new")
+            row = split.row()
+
+            if slot:
+                row.prop(slot, "link", text="")
+            else:
+                row.label()
+        elif mat:
+            split.template_ID(space, "pin_id")
+            split.separator()
+
+
+class CYCLES_OBJECT_PT_motion_blur(CyclesButtonsPanel, Panel):
+    bl_label = "Motion Blur"
+    bl_context = "object"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        ob = context.object
+        if CyclesButtonsPanel.poll(context) and ob:
+            if ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'CAMERA'}:
+                return True
+            if ob.instance_type == 'COLLECTION' and ob.instance_collection:
+                return True
+            # TODO(sergey): More duplicator types here?
+        return False
+
+    def draw_header(self, context):
+        layout = self.layout
+
+        rd = context.scene.render
+        # scene = context.scene
+
+        layout.active = rd.use_motion_blur
+
+        ob = context.object
+        cob = ob.cycles
+
+        layout.prop(cob, "use_motion_blur", text="")
+
+    def draw(self, context):
+        layout = self.layout
+
+        rd = context.scene.render
+        # scene = context.scene
+
+        ob = context.object
+        cob = ob.cycles
+
+        layout.active = (rd.use_motion_blur and cob.use_motion_blur)
+
+        row = layout.row()
+        if ob.type != 'CAMERA':
+            row.prop(cob, "use_deform_motion", text="Deformation")
+        row.prop(cob, "motion_steps", text="Steps")
+
+
+def has_geometry_visibility(ob):
+    return ob and ((ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LIGHT'}) or
+                    (ob.instance_type == 'COLLECTION' and ob.instance_collection))
+
+
+class CYCLES_OBJECT_PT_visibility(CyclesButtonsPanel, Panel):
+    bl_label = "Visibility"
+    bl_context = "object"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        return  CyclesButtonsPanel.poll(context) and (context.object)
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+
+        flow = layout.grid_flow(row_major=False, columns=0, even_columns=True, even_rows=False, align=False)
+        layout = self.layout
+        ob = context.object
+
+        col = flow.column()
+        col.prop(ob, "hide_viewport", text="Show in Viewports", invert_checkbox=True, toggle=False)
+        col = flow.column()
+        col.prop(ob, "hide_render", text="Show in Renders", invert_checkbox=True, toggle=False)
+        col = flow.column()
+        col.prop(ob, "hide_select", text="Selectable", invert_checkbox=True, toggle=False)
+
+        if has_geometry_visibility(ob):
+            cob = ob.cycles
+            col = flow.column()
+            col.prop(cob, "is_shadow_catcher")
+            col = flow.column()
+            col.prop(cob, "is_holdout")
+
+
+class CYCLES_OBJECT_PT_visibility_ray_visibility(CyclesButtonsPanel, Panel):
+    bl_label = "Ray Visibility"
+    bl_parent_id = "CYCLES_OBJECT_PT_visibility"
+    bl_context = "object"
+
+    @classmethod
+    def poll(cls, context):
+        ob = context.object
+        return CyclesButtonsPanel.poll(context) and has_geometry_visibility(ob)
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        ob = context.object
+        cob = ob.cycles
+        visibility = ob.cycles_visibility
+
+        flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+
+        col = flow.column()
+        col.prop(visibility, "camera")
+        col = flow.column()
+        col.prop(visibility, "diffuse")
+        col = flow.column()
+        col.prop(visibility, "glossy")
+        col = flow.column()
+        col.prop(visibility, "transmission")
+        col = flow.column()
+        col.prop(visibility, "scatter")
+
+        if ob.type != 'LIGHT':
+            col = flow.column()
+            col.prop(visibility, "shadow")
+
+        layout.separator()
+
+
+class CYCLES_OBJECT_PT_visibility_culling(CyclesButtonsPanel, Panel):
+    bl_label = "Culling"
+    bl_parent_id = "CYCLES_OBJECT_PT_visibility"
+    bl_context = "object"
+
+    @classmethod
+    def poll(cls, context):
+        ob = context.object
+        return CyclesButtonsPanel.poll(context) and has_geometry_visibility(ob)
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        cscene = scene.cycles
+        ob = context.object
+        cob = ob.cycles
+
+        flow = layout.grid_flow(row_major=True, columns=0, even_columns=True, even_rows=False, align=False)
+
+        col = flow.column()
+        col.active = scene.render.use_simplify and cscene.use_camera_cull
+        col.prop(cob, "use_camera_cull")
+
+        col = flow.column()
+        col.active = scene.render.use_simplify and cscene.use_distance_cull
+        col.prop(cob, "use_distance_cull")
+
+
+def panel_node_draw(layout, id_data, output_type, input_name):
+    if not id_data.use_nodes:
+        layout.operator("cycles.use_shading_nodes", icon='NODETREE')
+        return False
+
+    ntree = id_data.node_tree
+
+    node = ntree.get_output_node('CYCLES')
+    if node:
+        input = find_node_input(node, input_name)
+        if input:
+            layout.template_node_view(ntree, node, input)
+        else:
+            layout.label(text="Incompatible output node")
+    else:
+        layout.label(text="No output node")
+
+    return True
+
+
+class CYCLES_LIGHT_PT_preview(CyclesButtonsPanel, Panel):
+    bl_label = "Preview"
+    bl_context = "data"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        return (
+            context.light and
+            not (
+                context.light.type == 'AREA' and
+                context.light.cycles.is_portal
+            ) and
+            CyclesButtonsPanel.poll(context)
+        )
+
+    def draw(self, context):
+        self.layout.template_preview(context.light)
+
+
+class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel):
+    bl_label = "Light"
+    bl_context = "data"
+
+    @classmethod
+    def poll(cls, context):
+        return context.light and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+
+        light = context.light
+        clamp = light.cycles
+
+        layout.use_property_decorate = False
+
+        if self.bl_space_type == 'PROPERTIES':
+            layout.row().prop(light, "type", expand=True)
+            layout.use_property_split = True
+        else:
+            layout.use_property_split = True
+            layout.row().prop(light, "type")
+
+        col = layout.column()
+
+        col.prop(light, "color")
+        col.prop(light, "energy")
+        col.separator()
+
+        if light.type in {'POINT', 'SPOT'}:
+            col.prop(light, "shadow_soft_size", text="Size")
+        elif light.type == 'SUN':
+            col.prop(light, "angle")
+        elif light.type == 'AREA':
+            col.prop(light, "shape", text="Shape")
+            sub = col.column(align=True)
+
+            if light.shape in {'SQUARE', 'DISK'}:
+                sub.prop(light, "size")
+            elif light.shape in {'RECTANGLE', 'ELLIPSE'}:
+                sub.prop(light, "size", text="Size X")
+                sub.prop(light, "size_y", text="Y")
+
+        if not (light.type == 'AREA' and clamp.is_portal):
+            sub = col.column()
+            if use_branched_path(context):
+                subsub = sub.row(align=True)
+                subsub.active = use_sample_all_lights(context)
+                subsub.prop(clamp, "samples")
+            sub.prop(clamp, "max_bounces")
+
+        sub = col.column(align=True)
+        sub.active = not (light.type == 'AREA' and clamp.is_portal)
+        sub.prop(clamp, "cast_shadow")
+        sub.prop(clamp, "use_multiple_importance_sampling", text="Multiple Importance")
+
+        if light.type == 'AREA':
+            col.prop(clamp, "is_portal", text="Portal")
+
+
+class CYCLES_LIGHT_PT_nodes(CyclesButtonsPanel, Panel):
+    bl_label = "Nodes"
+    bl_context = "data"
+
+    @classmethod
+    def poll(cls, context):
+        return context.light and not (context.light.type == 'AREA' and
+                                      context.light.cycles.is_portal) and \
+            CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+
+        light = context.light
+        panel_node_draw(layout, light, 'OUTPUT_LIGHT', 'Surface')
+
+
+class CYCLES_LIGHT_PT_spot(CyclesButtonsPanel, Panel):
+    bl_label = "Spot Shape"
+    bl_context = "data"
+
+    @classmethod
+    def poll(cls, context):
+        light = context.light
+        return (light and light.type == 'SPOT') and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+        light = context.light
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        col = layout.column()
+        col.prop(light, "spot_size", text="Size")
+        col.prop(light, "spot_blend", text="Blend", slider=True)
+        col.prop(light, "show_cone")
+
+
+class CYCLES_WORLD_PT_preview(CyclesButtonsPanel, Panel):
+    bl_label = "Preview"
+    bl_context = "world"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        return context.world and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        self.layout.template_preview(context.world)
+
+
+class CYCLES_WORLD_PT_surface(CyclesButtonsPanel, Panel):
+    bl_label = "Surface"
+    bl_context = "world"
+
+    @classmethod
+    def poll(cls, context):
+        return context.world and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+
+        world = context.world
+
+        if not panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Surface'):
+            layout.prop(world, "color")
+
+
+class CYCLES_WORLD_PT_volume(CyclesButtonsPanel, Panel):
+    bl_label = "Volume"
+    bl_context = "world"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        world = context.world
+        return world and world.node_tree and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+
+        world = context.world
+        panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Volume')
+
+
+class CYCLES_WORLD_PT_ambient_occlusion(CyclesButtonsPanel, Panel):
+    bl_label = "Ambient Occlusion"
+    bl_context = "world"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        return context.world and CyclesButtonsPanel.poll(context)
+
+    def draw_header(self, context):
+        light = context.world.light_settings
+        self.layout.prop(light, "use_ambient_occlusion", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        light = context.world.light_settings
+        scene = context.scene
+
+        col = layout.column()
+        sub = col.column()
+        sub.active = light.use_ambient_occlusion or scene.render.use_simplify
+        sub.prop(light, "ao_factor", text="Factor")
+        col.prop(light, "distance", text="Distance")
+
+
+class CYCLES_WORLD_PT_mist(CyclesButtonsPanel, Panel):
+    bl_label = "Mist Pass"
+    bl_context = "world"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        if CyclesButtonsPanel.poll(context):
+            if context.world:
+                for view_layer in context.scene.view_layers:
+                    if view_layer.use_pass_mist:
+                        return True
+
+        return False
+
+    def draw(self, context):
+        layout = self.layout
+
+        world = context.world
+
+        split = layout.split(align=True)
+        split.prop(world.mist_settings, "start")
+        split.prop(world.mist_settings, "depth")
+
+        layout.prop(world.mist_settings, "falloff")
+
+
+class CYCLES_WORLD_PT_ray_visibility(CyclesButtonsPanel, Panel):
+    bl_label = "Ray Visibility"
+    bl_context = "world"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        return CyclesButtonsPanel.poll(context) and context.world
+
+    def draw(self, context):
+        layout = self.layout
+
+        world = context.world
+        visibility = world.cycles_visibility
+
+        flow = layout.column_flow()
+
+        flow.prop(visibility, "camera")
+        flow.prop(visibility, "diffuse")
+        flow.prop(visibility, "glossy")
+        flow.prop(visibility, "transmission")
+        flow.prop(visibility, "scatter")
+
+
+class CYCLES_WORLD_PT_settings(CyclesButtonsPanel, Panel):
+    bl_label = "Settings"
+    bl_context = "world"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        return context.world and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        layout.column()
+
+
+class CYCLES_WORLD_PT_settings_surface(CyclesButtonsPanel, Panel):
+    bl_label = "Surface"
+    bl_parent_id = "CYCLES_WORLD_PT_settings"
+    bl_context = "world"
+
+    @classmethod
+    def poll(cls, context):
+        return context.world and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        world = context.world
+        cworld = world.cycles
+
+        col = layout.column()
+        col.prop(cworld, "sampling_method", text="Sampling")
+
+        sub = col.column()
+        sub.active = cworld.sampling_method != 'NONE'
+        subsub = sub.row(align=True)
+        subsub.active = cworld.sampling_method == 'MANUAL'
+        subsub.prop(cworld, "sample_map_resolution")
+        if use_branched_path(context):
+            subsub = sub.column(align=True)
+            subsub.active = use_sample_all_lights(context)
+            subsub.prop(cworld, "samples")
+        sub.prop(cworld, "max_bounces")
+
+
+class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel):
+    bl_label = "Volume"
+    bl_parent_id = "CYCLES_WORLD_PT_settings"
+    bl_context = "world"
+
+    @classmethod
+    def poll(cls, context):
+        return context.world and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        world = context.world
+        cworld = world.cycles
+
+        col = layout.column()
+
+        sub = col.column()
+        sub.active = use_cpu(context)
+        sub.prop(cworld, "volume_sampling", text="Sampling")
+        col.prop(cworld, "volume_interpolation", text="Interpolation")
+        col.prop(cworld, "homogeneous_volume", text="Homogeneous")
+
+
+class CYCLES_MATERIAL_PT_preview(CyclesButtonsPanel, Panel):
+    bl_label = "Preview"
+    bl_context = "material"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        mat = context.material
+        return mat and (not mat.grease_pencil) and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        self.layout.template_preview(context.material)
+
+
+class CYCLES_MATERIAL_PT_surface(CyclesButtonsPanel, Panel):
+    bl_label = "Surface"
+    bl_context = "material"
+
+    @classmethod
+    def poll(cls, context):
+        mat = context.material
+        return mat and (not mat.grease_pencil) and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+
+        mat = context.material
+        if not panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Surface'):
+            layout.prop(mat, "diffuse_color")
+
+
+class CYCLES_MATERIAL_PT_volume(CyclesButtonsPanel, Panel):
+    bl_label = "Volume"
+    bl_context = "material"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        mat = context.material
+        return mat and (not mat.grease_pencil) and mat.node_tree and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+
+        mat = context.material
+        # cmat = mat.cycles
+
+        panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Volume')
+
+
+class CYCLES_MATERIAL_PT_displacement(CyclesButtonsPanel, Panel):
+    bl_label = "Displacement"
+    bl_context = "material"
+
+    @classmethod
+    def poll(cls, context):
+        mat = context.material
+        return mat and (not mat.grease_pencil) and mat.node_tree and CyclesButtonsPanel.poll(context)
+
+    def draw(self, context):
+        layout = self.layout
+
+        mat = context.material
+        panel_node_draw(layout, mat, 'OUTPUT_MATERIAL', 'Displacement')
+
+
+class CYCLES_MATERIAL_PT_settings(CyclesButtonsPanel, Panel):
+    bl_label = "Settings"
+    bl_context = "material"
+    bl_options = {'DEFAULT_CLOSED'}
+
+    @classmethod
+    def poll(cls, context):
+        mat = context.material
+        return mat and (not mat.grease_pencil) and CyclesButtonsPanel.poll(context)
+
+    @staticmethod
+    def draw_shared(self, mat):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        layout.prop(mat, "pass_index")
+
+    def draw(self, context):
+        self.draw_shared(self, context.material)
+
+
+class CYCLES_MATERIAL_PT_settings_surface(CyclesButtonsPanel, Panel):
+    bl_label = "Surface"
+    bl_parent_id = "CYCLES_MATERIAL_PT_settings"
+    bl_context = "material"
+
+    @staticmethod
+    def draw_shared(self, mat):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        cmat = mat.cycles
+
+        col = layout.column()
+        col.prop(cmat, "sample_as_light", text="Multiple Importance")
+        col.prop(cmat, "use_transparent_shadow")
+        col.prop(cmat, "displacement_method", text="Displacement")
+
+    def draw(self, context):
+        self.draw_shared(self, context.material)
+
+
+class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel):
+    bl_label = "Volume"
+    bl_parent_id = "CYCLES_MATERIAL_PT_settings"
+    bl_context = "material"
+
+    @staticmethod
+    def draw_shared(self, context, mat):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        cmat = mat.cycles
+
+        col = layout.column()
+        sub = col.column()
+        sub.active = use_cpu(context)
+        sub.prop(cmat, "volume_sampling", text="Sampling")
+        col.prop(cmat, "volume_interpolation", text="Interpolation")
+        col.prop(cmat, "homogeneous_volume", text="Homogeneous")
+
+    def draw(self, context):
+        self.draw_shared(self, context, context.material)
+
+
+class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel):
+    bl_label = "Bake"
+    bl_context = "render"
+    bl_options = {'DEFAULT_CLOSED'}
+    COMPAT_ENGINES = {'CYCLES'}
+
+    @classmethod
+    def poll(cls, context):
+        return CyclesButtonsPanel.poll(context) and not use_optix(context)
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False  # No animation.
+
+        scene = context.scene
+        cscene = scene.cycles
+        cbk = scene.render.bake
+        rd = scene.render
+
+        if rd.use_bake_multires:
+            layout.operator("object.bake_image", icon='RENDER_STILL')
+            layout.prop(rd, "use_bake_multires")
+            layout.prop(rd, "bake_type")
+
+        else:
+            layout.operator("object.bake", icon='RENDER_STILL').type = cscene.bake_type
+            layout.prop(rd, "use_bake_multires")
+            layout.prop(cscene, "bake_type")
+
+
+class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel):
+    bl_label = "Influence"
+    bl_context = "render"
+    bl_parent_id = "CYCLES_RENDER_PT_bake"
+    COMPAT_ENGINES = {'CYCLES'}
+    @classmethod
+    def poll(cls, context):
+        scene = context.scene
+        cscene = scene.cycles
+        rd = scene.render
+        if rd.use_bake_multires == False and cscene.bake_type in {
+                'NORMAL', 'COMBINED', 'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}:
+            return True
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False  # No animation.
+
+        scene = context.scene
+        cscene = scene.cycles
+        cbk = scene.render.bake
+        rd = scene.render
+
+        col = layout.column()
+
+        if cscene.bake_type == 'NORMAL':
+            col.prop(cbk, "normal_space", text="Space")
+
+            sub = col.column(align=True)
+            sub.prop(cbk, "normal_r", text="Swizzle R")
+            sub.prop(cbk, "normal_g", text="G")
+            sub.prop(cbk, "normal_b", text="B")
+
+        elif cscene.bake_type == 'COMBINED':
+            row = col.row(align=True)
+            row.use_property_split = False
+            row.prop(cbk, "use_pass_direct", toggle=True)
+            row.prop(cbk, "use_pass_indirect", toggle=True)
+
+            flow = col.grid_flow(row_major=False, columns=0, even_columns=False, even_rows=False, align=True)
+
+            flow.active = cbk.use_pass_direct or cbk.use_pass_indirect
+            flow.prop(cbk, "use_pass_diffuse")
+            flow.prop(cbk, "use_pass_glossy")
+            flow.prop(cbk, "use_pass_transmission")
+            flow.prop(cbk, "use_pass_subsurface")
+            flow.prop(cbk, "use_pass_ambient_occlusion")
+            flow.prop(cbk, "use_pass_emit")
+
+        elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION', 'SUBSURFACE'}:
+            row = col.row(align=True)
+            row.use_property_split = False
+            row.prop(cbk, "use_pass_direct", toggle=True)
+            row.prop(cbk, "use_pass_indirect", toggle=True)
+            row.prop(cbk, "use_pass_color", toggle=True)
+
+
+class CYCLES_RENDER_PT_bake_selected_to_active(CyclesButtonsPanel, Panel):
+    bl_label = "Selected to Active"
+    bl_context = "render"
+    bl_parent_id = "CYCLES_RENDER_PT_bake"
+    bl_options = {'DEFAULT_CLOSED'}
+    COMPAT_ENGINES = {'CYCLES'}
+
+    @classmethod
+    def poll(cls, context):
+        scene = context.scene
+        rd = scene.render
+        return rd.use_bake_multires == False
+
+    def draw_header(self, context):
+        scene = context.scene
+        cbk = scene.render.bake
+        self.layout.prop(cbk, "use_selected_to_active", text="")
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False  # No animation.
+
+        scene = context.scene
+        cscene = scene.cycles
+        cbk = scene.render.bake
+        rd = scene.render
+
+        layout.active = cbk.use_selected_to_active
+        col = layout.column()
+
+        col.prop(cbk, "use_cage", text="Cage")
+        if cbk.use_cage:
+            col.prop(cbk, "cage_extrusion", text="Extrusion")
+            col.prop(cbk, "cage_object", text="Cage Object")
+        else:
+            col.prop(cbk, "cage_extrusion", text="Ray Distance")
+
+
+class CYCLES_RENDER_PT_bake_output(CyclesButtonsPanel, Panel):
+    bl_label = "Output"
+    bl_context = "render"
+    bl_parent_id = "CYCLES_RENDER_PT_bake"
+    COMPAT_ENGINES = {'CYCLES'}
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False  # No animation.
+
+        scene = context.scene
+        cscene = scene.cycles
+        cbk = scene.render.bake
+        rd = scene.render
+
+        if rd.use_bake_multires:
+            layout.prop(rd, "bake_margin")
+            layout.prop(rd, "use_bake_clear", text="Clear Image")
+
+            if rd.bake_type == 'DISPLACEMENT':
+                layout.prop(rd, "use_bake_lores_mesh")
+        else:
+
+            layout.prop(cbk, "margin")
+            layout.prop(cbk, "use_clear", text="Clear Image")
+
+
+class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel):
+    bl_label = "Debug"
+    bl_context = "render"
+    bl_options = {'DEFAULT_CLOSED'}
+    COMPAT_ENGINES = {'CYCLES'}
+
+    @classmethod
+    def poll(cls, context):
+        return CyclesButtonsPanel.poll(context) and bpy.app.debug_value == 256
+
+    def draw(self, context):
+        layout = self.layout
+
+        scene = context.scene
+        cscene = scene.cycles
+
+        col = layout.column()
+
+        col.label(text="CPU Flags:")
+        row = col.row(align=True)
+        row.prop(cscene, "debug_use_cpu_sse2", toggle=True)
+        row.prop(cscene, "debug_use_cpu_sse3", toggle=True)
+        row.prop(cscene, "debug_use_cpu_sse41", toggle=True)
+        row.prop(cscene, "debug_use_cpu_avx", toggle=True)
+        row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
+        col.prop(cscene, "debug_bvh_layout")
+        col.prop(cscene, "debug_use_cpu_split_kernel")
+
+        col.separator()
+
+        col = layout.column()
+        col.label(text="CUDA Flags:")
+        col.prop(cscene, "debug_use_cuda_adaptive_compile")
+        col.prop(cscene, "debug_use_cuda_split_kernel")
+
+        col.separator()
+
+        col = layout.column()
+        col.label(text="OptiX Flags:")
+        col.prop(cscene, "debug_optix_cuda_streams")
+
+        col.separator()
+
+        col = layout.column()
+        col.label(text="OpenCL Flags:")
+        col.prop(cscene, "debug_opencl_device_type", text="Device")
+        col.prop(cscene, "debug_use_opencl_debug", text="Debug")
+        col.prop(cscene, "debug_opencl_mem_limit")
+
+        col.separator()
+
+        col = layout.column()
+        col.prop(cscene, "debug_bvh_type")
+
+
+class CYCLES_RENDER_PT_simplify(CyclesButtonsPanel, Panel):
+    bl_label = "Simplify"
+    bl_context = "render"
+    bl_options = {'DEFAULT_CLOSED'}
+    COMPAT_ENGINES = {'CYCLES'}
+
+    def draw_header(self, context):
+        rd = context.scene.render
+        self.layout.prop(rd, "use_simplify", text="")
+
+    def draw(self, context):
+        pass
+
+
+class CYCLES_RENDER_PT_simplify_viewport(CyclesButtonsPanel, Panel):
+    bl_label = "Viewport"
+    bl_context = "render"
+    bl_parent_id = "CYCLES_RENDER_PT_simplify"
+    COMPAT_ENGINES = {'CYCLES'}
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        rd = scene.render
+        cscene = scene.cycles
+
+        layout.active = rd.use_simplify
+
+        col = layout.column()
+        col.prop(rd, "simplify_subdivision", text="Max Subdivision")
+        col.prop(rd, "simplify_child_particles", text="Child Particles")
+        col.prop(cscene, "texture_limit", text="Texture Limit")
+        col.prop(cscene, "ao_bounces", text="AO Bounces")
+        col.prop(rd, "use_simplify_smoke_highres")
+
+class CYCLES_RENDER_PT_simplify_render(CyclesButtonsPanel, Panel):
+    bl_label = "Render"
+    bl_context = "render"
+    bl_parent_id = "CYCLES_RENDER_PT_simplify"
+    COMPAT_ENGINES = {'CYCLES'}
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        rd = scene.render
+        cscene = scene.cycles
+
+        layout.active = rd.use_simplify
+
+        col = layout.column()
+
+        col.prop(rd, "simplify_subdivision_render", text="Max Subdivision")
+        col.prop(rd, "simplify_child_particles_render", text="Child Particles")
+        col.prop(cscene, "texture_limit_render", text="Texture Limit")
+        col.prop(cscene, "ao_bounces_render", text="AO Bounces")
+
+
+class CYCLES_RENDER_PT_simplify_culling(CyclesButtonsPanel, Panel):
+    bl_label = "Culling"
+    bl_context = "render"
+    bl_parent_id = "CYCLES_RENDER_PT_simplify"
+    bl_options = {'DEFAULT_CLOSED'}
+    COMPAT_ENGINES = {'CYCLES'}
+
+    def draw(self, context):
+        layout = self.layout
+        layout.use_property_split = True
+        layout.use_property_decorate = False
+
+        scene = context.scene
+        rd = scene.render
+        cscene = scene.cycles
+
+        layout.active = rd.use_simplify
+
+        col = layout.column()
+        col.prop(cscene, "use_camera_cull")
+        sub = col.column()
+        sub.active = cscene.use_camera_cull
+        sub.prop(cscene, "camera_cull_margin")
+
+        col = layout.column()
+        col.prop(cscene, "use_distance_cull")
+        sub = col.column()
+        sub.active = cscene.use_distance_cull
+        sub.prop(cscene, "distance_cull_margin", text="Distance")
+
+
+class CYCLES_VIEW3D_PT_shading_render_pass(Panel):
+    bl_space_type = 'VIEW_3D'
+    bl_region_type = 'HEADER'
+    bl_label = "Render Pass"
+    bl_parent_id = 'VIEW3D_PT_shading'
+    COMPAT_ENGINES = {'CYCLES'}
+
+    @classmethod
+    def poll(cls, context):
+        return (context.engine in cls.COMPAT_ENGINES
+            and context.space_data.shading.type == 'RENDERED')
+
+    def draw(self, context):
+        shading = context.space_data.shading
+
+        layout = self.layout
+        layout.prop(shading.cycles, "render_pass", text="")
+
+
+class CYCLES_VIEW3D_PT_shading_lighting(Panel):
+    bl_space_type = 'VIEW_3D'
+    bl_region_type = 'HEADER'
+    bl_label = "Lighting"
+    bl_parent_id = 'VIEW3D_PT_shading'
+    COMPAT_ENGINES = {'CYCLES'}
+
+    @classmethod
+    def poll(cls, context):
+        return (context.engine in cls.COMPAT_ENGINES
+            and context.space_data.shading.type == 'RENDERED')
+
+    def draw(self, context):
+        layout = self.layout
+        col = layout.column()
+        split = col.split(factor=0.9)
+
+        shading = context.space_data.shading
+        col.prop(shading, "use_scene_lights_render")
+        col.prop(shading, "use_scene_world_render")
+
+        if not shading.use_scene_world_render:
+            col = layout.column()
+            split = col.split(factor=0.9)
+
+            col = split.column()
+            sub = col.row()
+            sub.scale_y = 0.6
+            sub.template_icon_view(shading, "studio_light", scale_popup=3)
+
+            col = split.column()
+            col.operator("preferences.studiolight_show", emboss=False, text="", icon='PREFERENCES')
+
+            split = layout.split(factor=0.9)
+            col = split.column()
+            col.prop(shading, "studiolight_rotate_z", text="Rotation")
+            col.prop(shading, "studiolight_intensity")
+            col.prop(shading, "studiolight_background_alpha")
+
+class CYCLES_VIEW3D_PT_simplify_greasepencil(CyclesButtonsPanel, Panel, GreasePencilSimplifyPanel):
+    bl_label = "Grease Pencil"
+    bl_parent_id = "CYCLES_RENDER_PT_simplify"
+    COMPAT_ENGINES = {'CYCLES'}
+    bl_options = {'DEFAULT_CLOSED'}
+
+def draw_device(self, context):
+    scene = context.scene
+    layout = self.layout
+    layout.use_property_split = True
+    layout.use_property_decorate = False
+
+    if context.engine == 'CYCLES':
+        from . import engine
+        cscene = scene.cycles
+
+        col = layout.column()
+        col.prop(cscene, "feature_set")
+
+        scene = context.scene
+
+        col = layout.column()
+        col.active = show_device_active(context)
+        col.prop(cscene, "device")
+
+        from . import engine
+        if engine.with_osl() and use_cpu(context):
+            col.prop(cscene, "shading_system")
+
+
+def draw_pause(self, context):
+    layout = self.layout
+    scene = context.scene
+
+    if context.engine == "CYCLES":
+        view = context.space_data
+
+        if view.shading.type == 'RENDERED':
+            cscene = scene.cycles
+            layout.prop(cscene, "preview_pause", icon='PLAY' if cscene.preview_pause else 'PAUSE', text="")
+
+
+def get_panels():
+    exclude_panels = {
+        'DATA_PT_area',
+        'DATA_PT_camera_dof',
+        'DATA_PT_falloff_curve',
+        'DATA_PT_light',
+        'DATA_PT_preview',
+        'DATA_PT_spot',
+        'MATERIAL_PT_context_material',
+        'MATERIAL_PT_preview',
+        'NODE_DATA_PT_light',
+        'NODE_DATA_PT_spot',
+        'OBJECT_PT_visibility',
+        'VIEWLAYER_PT_filter',
+        'VIEWLAYER_PT_layer_passes',
+        'RENDER_PT_post_processing',
+        'RENDER_PT_simplify',
+    }
+
+    panels = []
+    for panel in bpy.types.Panel.__subclasses__():
+        if hasattr(panel, 'COMPAT_ENGINES') and 'BLENDER_RENDER' in panel.COMPAT_ENGINES:
+            if panel.__name__ not in exclude_panels:
+                panels.append(panel)
+
+    return panels
+
+
+classes = (
+    CYCLES_PT_sampling_presets,
+    CYCLES_PT_integrator_presets,
+    CYCLES_RENDER_PT_sampling,
+    CYCLES_RENDER_PT_sampling_sub_samples,
+    CYCLES_RENDER_PT_sampling_advanced,
+    CYCLES_RENDER_PT_light_paths,
+    CYCLES_RENDER_PT_light_paths_max_bounces,
+    CYCLES_RENDER_PT_light_paths_clamping,
+    CYCLES_RENDER_PT_light_paths_caustics,
+    CYCLES_RENDER_PT_volumes,
+    CYCLES_RENDER_PT_subdivision,
+    CYCLES_RENDER_PT_hair,
+    CYCLES_RENDER_PT_simplify,
+    CYCLES_RENDER_PT_simplify_viewport,
+    CYCLES_RENDER_PT_simplify_render,
+    CYCLES_RENDER_PT_simplify_culling,
+    CYCLES_VIEW3D_PT_simplify_greasepencil,
+    CYCLES_VIEW3D_PT_shading_lighting,
+    CYCLES_VIEW3D_PT_shading_render_pass,
+    CYCLES_RENDER_PT_motion_blur,
+    CYCLES_RENDER_PT_motion_blur_curve,
+    CYCLES_RENDER_PT_film,
+    CYCLES_RENDER_PT_film_pixel_filter,
+    CYCLES_RENDER_PT_film_transparency,
+    CYCLES_RENDER_PT_performance,
+    CYCLES_RENDER_PT_performance_threads,
+    CYCLES_RENDER_PT_performance_tiles,
+    CYCLES_RENDER_PT_performance_acceleration_structure,
+    CYCLES_RENDER_PT_performance_final_render,
+    CYCLES_RENDER_PT_performance_viewport,
+    CYCLES_RENDER_PT_passes,
+    CYCLES_RENDER_PT_passes_data,
+    CYCLES_RENDER_PT_passes_light,
+    CYCLES_RENDER_PT_passes_crypto,
+    CYCLES_RENDER_PT_passes_debug,
+    CYCLES_RENDER_UL_aov,
+    CYCLES_RENDER_PT_passes_aov,
+    CYCLES_RENDER_PT_filter,
+    CYCLES_RENDER_PT_override,
+    CYCLES_RENDER_PT_denoising,
+    CYCLES_PT_post_processing,
+    CYCLES_CAMERA_PT_dof,
+    CYCLES_CAMERA_PT_dof_aperture,
+    CYCLES_PT_context_material,
+    CYCLES_OBJECT_PT_motion_blur,
+    CYCLES_OBJECT_PT_visibility,
+    CYCLES_OBJECT_PT_visibility_ray_visibility,
+    CYCLES_OBJECT_PT_visibility_culling,
+    CYCLES_LIGHT_PT_preview,
+    CYCLES_LIGHT_PT_light,
+    CYCLES_LIGHT_PT_nodes,
+    CYCLES_LIGHT_PT_spot,
+    CYCLES_WORLD_PT_preview,
+    CYCLES_WORLD_PT_surface,
+    CYCLES_WORLD_PT_volume,
+    CYCLES_WORLD_PT_ambient_occlusion,
+    CYCLES_WORLD_PT_mist,
+    CYCLES_WORLD_PT_ray_visibility,
+    CYCLES_WORLD_PT_settings,
+    CYCLES_WORLD_PT_settings_surface,
+    CYCLES_WORLD_PT_settings_volume,
+    CYCLES_MATERIAL_PT_preview,
+    CYCLES_MATERIAL_PT_surface,
+    CYCLES_MATERIAL_PT_volume,
+    CYCLES_MATERIAL_PT_displacement,
+    CYCLES_MATERIAL_PT_settings,
+    CYCLES_MATERIAL_PT_settings_surface,
+    CYCLES_MATERIAL_PT_settings_volume,
+    CYCLES_RENDER_PT_bake,
+    CYCLES_RENDER_PT_bake_influence,
+    CYCLES_RENDER_PT_bake_selected_to_active,
+    CYCLES_RENDER_PT_bake_output,
+    CYCLES_RENDER_PT_debug,
+    node_panel(CYCLES_MATERIAL_PT_settings),
+    node_panel(CYCLES_MATERIAL_PT_settings_surface),
+    node_panel(CYCLES_MATERIAL_PT_settings_volume),
+    node_panel(CYCLES_WORLD_PT_ray_visibility),
+    node_panel(CYCLES_WORLD_PT_settings),
+    node_panel(CYCLES_WORLD_PT_settings_surface),
+    node_panel(CYCLES_WORLD_PT_settings_volume),
+    node_panel(CYCLES_LIGHT_PT_light),
+    node_panel(CYCLES_LIGHT_PT_spot),
+)
+
+
+def register():
+    from bpy.utils import register_class
+
+    bpy.types.RENDER_PT_context.append(draw_device)
+    bpy.types.VIEW3D_HT_header.append(draw_pause)
+
+    for panel in get_panels():
+        panel.COMPAT_ENGINES.add('CYCLES')
+
+    for cls in classes:
+        register_class(cls)
+
+
+def unregister():
+    from bpy.utils import unregister_class
+
+    bpy.types.RENDER_PT_context.remove(draw_device)
+    bpy.types.VIEW3D_HT_header.remove(draw_pause)
+
+    for panel in get_panels():
+        if 'CYCLES' in panel.COMPAT_ENGINES:
+            panel.COMPAT_ENGINES.remove('CYCLES')
+
+    for cls in classes:
+        unregister_class(cls)
diff -Naur a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
--- a/intern/cycles/blender/blender_session.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/blender_session.cpp	2020-01-10 20:42:43.457590054 +0300
@@ -474,7 +474,8 @@
   b_rlay_name = b_view_layer.name();

   /* add passes */
-  vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer);
+  vector<Pass> passes = sync->sync_render_passes(
+      b_rlay, b_view_layer, session_params.adaptive_sampling);
   buffer_params.passes = passes;

   PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
diff -Naur a/intern/cycles/blender/blender_session.cpp.orig b/intern/cycles/blender/blender_session.cpp.orig
--- a/intern/cycles/blender/blender_session.cpp.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/blender/blender_session.cpp.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,1513 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+
+#include "device/device.h"
+#include "render/background.h"
+#include "render/buffers.h"
+#include "render/camera.h"
+#include "render/colorspace.h"
+#include "render/film.h"
+#include "render/integrator.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/session.h"
+#include "render/shader.h"
+#include "render/stats.h"
+
+#include "util/util_algorithm.h"
+#include "util/util_color.h"
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_hash.h"
+#include "util/util_logging.h"
+#include "util/util_murmurhash.h"
+#include "util/util_progress.h"
+#include "util/util_time.h"
+
+#include "blender/blender_sync.h"
+#include "blender/blender_session.h"
+#include "blender/blender_util.h"
+
+CCL_NAMESPACE_BEGIN
+
+bool BlenderSession::headless = false;
+int BlenderSession::num_resumable_chunks = 0;
+int BlenderSession::current_resumable_chunk = 0;
+int BlenderSession::start_resumable_chunk = 0;
+int BlenderSession::end_resumable_chunk = 0;
+bool BlenderSession::print_render_stats = false;
+
+BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
+                               BL::Preferences &b_userpref,
+                               BL::BlendData &b_data,
+                               bool preview_osl)
+    : session(NULL),
+      sync(NULL),
+      b_engine(b_engine),
+      b_userpref(b_userpref),
+      b_data(b_data),
+      b_render(b_engine.render()),
+      b_depsgraph(PointerRNA_NULL),
+      b_scene(PointerRNA_NULL),
+      b_v3d(PointerRNA_NULL),
+      b_rv3d(PointerRNA_NULL),
+      width(0),
+      height(0),
+      preview_osl(preview_osl),
+      python_thread_state(NULL)
+{
+  /* offline render */
+  background = true;
+  last_redraw_time = 0.0;
+  start_resize_time = 0.0;
+  last_status_time = 0.0;
+}
+
+BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
+                               BL::Preferences &b_userpref,
+                               BL::BlendData &b_data,
+                               BL::SpaceView3D &b_v3d,
+                               BL::RegionView3D &b_rv3d,
+                               int width,
+                               int height)
+    : session(NULL),
+      sync(NULL),
+      b_engine(b_engine),
+      b_userpref(b_userpref),
+      b_data(b_data),
+      b_render(b_engine.render()),
+      b_depsgraph(PointerRNA_NULL),
+      b_scene(PointerRNA_NULL),
+      b_v3d(b_v3d),
+      b_rv3d(b_rv3d),
+      width(width),
+      height(height),
+      preview_osl(false),
+      python_thread_state(NULL)
+{
+  /* 3d view render */
+  background = false;
+  last_redraw_time = 0.0;
+  start_resize_time = 0.0;
+  last_status_time = 0.0;
+}
+
+BlenderSession::~BlenderSession()
+{
+  free_session();
+}
+
+void BlenderSession::create_session()
+{
+  SessionParams session_params = BlenderSync::get_session_params(
+      b_engine, b_userpref, b_scene, background);
+  SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+  bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+
+  /* reset status/progress */
+  last_status = "";
+  last_error = "";
+  last_progress = -1.0f;
+  start_resize_time = 0.0;
+
+  /* create session */
+  session = new Session(session_params);
+  session->scene = scene;
+  session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this));
+  session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this));
+  session->set_pause(session_pause);
+
+  /* create scene */
+  scene = new Scene(scene_params, session->device);
+  scene->name = b_scene.name();
+
+  /* setup callbacks for builtin image support */
+  scene->image_manager->builtin_image_info_cb = function_bind(
+      &BlenderSession::builtin_image_info, this, _1, _2, _3);
+  scene->image_manager->builtin_image_pixels_cb = function_bind(
+      &BlenderSession::builtin_image_pixels, this, _1, _2, _3, _4, _5, _6, _7);
+  scene->image_manager->builtin_image_float_pixels_cb = function_bind(
+      &BlenderSession::builtin_image_float_pixels, this, _1, _2, _3, _4, _5, _6, _7);
+
+  session->scene = scene;
+
+  /* There is no single depsgraph to use for the entire render.
+   * So we need to handle this differently.
+   *
+   * We could loop over the final render result render layers in pipeline and keep Cycles unaware
+   * of multiple layers, or perhaps move syncing further down in the pipeline.
+   */
+  /* create sync */
+  sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
+  BL::Object b_camera_override(b_engine.camera_override());
+  if (b_v3d) {
+    sync->sync_view(b_v3d, b_rv3d, width, height);
+  }
+  else {
+    sync->sync_camera(b_render, b_camera_override, width, height, "");
+  }
+
+  /* set buffer parameters */
+  BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_render, b_v3d, b_rv3d, scene->camera, width, height);
+  session->reset(buffer_params, session_params.samples);
+
+  b_engine.use_highlight_tiles(session_params.progressive_refine == false);
+
+  update_resumable_tile_manager(session_params.samples);
+}
+
+void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph)
+{
+  this->b_data = b_data;
+  this->b_depsgraph = b_depsgraph;
+  this->b_scene = b_depsgraph.scene_eval();
+
+  if (preview_osl) {
+    PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+    RNA_boolean_set(&cscene, "shading_system", preview_osl);
+  }
+
+  if (b_v3d) {
+    this->b_render = b_scene.render();
+  }
+  else {
+    this->b_render = b_engine.render();
+    width = render_resolution_x(b_render);
+    height = render_resolution_y(b_render);
+  }
+
+  bool is_new_session = (session == NULL);
+  if (is_new_session) {
+    /* Initialize session and remember it was just created so not to
+     * re-create it below.
+     */
+    create_session();
+  }
+
+  if (b_v3d) {
+    /* NOTE: We need to create session, but all the code from below
+     * will make viewport render to stuck on initialization.
+     */
+    return;
+  }
+
+  SessionParams session_params = BlenderSync::get_session_params(
+      b_engine, b_userpref, b_scene, background);
+  SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+
+  if (scene->params.modified(scene_params) || session->params.modified(session_params) ||
+      !scene_params.persistent_data) {
+    /* if scene or session parameters changed, it's easier to simply re-create
+     * them rather than trying to distinguish which settings need to be updated
+     */
+    if (!is_new_session) {
+      free_session();
+      create_session();
+    }
+    return;
+  }
+
+  session->progress.reset();
+  scene->reset();
+
+  session->tile_manager.set_tile_order(session_params.tile_order);
+
+  /* peak memory usage should show current render peak, not peak for all renders
+   * made by this render session
+   */
+  session->stats.mem_peak = session->stats.mem_used;
+
+  /* There is no single depsgraph to use for the entire render.
+   * See note on create_session().
+   */
+  /* sync object should be re-created */
+  sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
+
+  BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
+  BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
+  BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_render, b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
+  session->reset(buffer_params, session_params.samples);
+
+  b_engine.use_highlight_tiles(session_params.progressive_refine == false);
+
+  /* reset time */
+  start_resize_time = 0.0;
+}
+
+void BlenderSession::free_session()
+{
+  if (sync)
+    delete sync;
+
+  delete session;
+}
+
+static ShaderEvalType get_shader_type(const string &pass_type)
+{
+  const char *shader_type = pass_type.c_str();
+
+  /* data passes */
+  if (strcmp(shader_type, "NORMAL") == 0)
+    return SHADER_EVAL_NORMAL;
+  else if (strcmp(shader_type, "UV") == 0)
+    return SHADER_EVAL_UV;
+  else if (strcmp(shader_type, "ROUGHNESS") == 0)
+    return SHADER_EVAL_ROUGHNESS;
+  else if (strcmp(shader_type, "DIFFUSE_COLOR") == 0)
+    return SHADER_EVAL_DIFFUSE_COLOR;
+  else if (strcmp(shader_type, "GLOSSY_COLOR") == 0)
+    return SHADER_EVAL_GLOSSY_COLOR;
+  else if (strcmp(shader_type, "TRANSMISSION_COLOR") == 0)
+    return SHADER_EVAL_TRANSMISSION_COLOR;
+  else if (strcmp(shader_type, "SUBSURFACE_COLOR") == 0)
+    return SHADER_EVAL_SUBSURFACE_COLOR;
+  else if (strcmp(shader_type, "EMIT") == 0)
+    return SHADER_EVAL_EMISSION;
+
+  /* light passes */
+  else if (strcmp(shader_type, "AO") == 0)
+    return SHADER_EVAL_AO;
+  else if (strcmp(shader_type, "COMBINED") == 0)
+    return SHADER_EVAL_COMBINED;
+  else if (strcmp(shader_type, "SHADOW") == 0)
+    return SHADER_EVAL_SHADOW;
+  else if (strcmp(shader_type, "DIFFUSE") == 0)
+    return SHADER_EVAL_DIFFUSE;
+  else if (strcmp(shader_type, "GLOSSY") == 0)
+    return SHADER_EVAL_GLOSSY;
+  else if (strcmp(shader_type, "TRANSMISSION") == 0)
+    return SHADER_EVAL_TRANSMISSION;
+  else if (strcmp(shader_type, "SUBSURFACE") == 0)
+    return SHADER_EVAL_SUBSURFACE;
+
+  /* extra */
+  else if (strcmp(shader_type, "ENVIRONMENT") == 0)
+    return SHADER_EVAL_ENVIRONMENT;
+
+  else
+    return SHADER_EVAL_BAKE;
+}
+
+static BL::RenderResult begin_render_result(BL::RenderEngine &b_engine,
+                                            int x,
+                                            int y,
+                                            int w,
+                                            int h,
+                                            const char *layername,
+                                            const char *viewname)
+{
+  return b_engine.begin_result(x, y, w, h, layername, viewname);
+}
+
+static void end_render_result(BL::RenderEngine &b_engine,
+                              BL::RenderResult &b_rr,
+                              bool cancel,
+                              bool highlight,
+                              bool do_merge_results)
+{
+  b_engine.end_result(b_rr, (int)cancel, (int)highlight, (int)do_merge_results);
+}
+
+void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
+                                                 bool do_update_only,
+                                                 bool highlight)
+{
+  int x = rtile.x - session->tile_manager.params.full_x;
+  int y = rtile.y - session->tile_manager.params.full_y;
+  int w = rtile.w;
+  int h = rtile.h;
+
+  /* get render result */
+  BL::RenderResult b_rr = begin_render_result(
+      b_engine, x, y, w, h, b_rlay_name.c_str(), b_rview_name.c_str());
+
+  /* can happen if the intersected rectangle gives 0 width or height */
+  if (b_rr.ptr.data == NULL) {
+    return;
+  }
+
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+
+  /* layer will be missing if it was disabled in the UI */
+  if (b_single_rlay == b_rr.layers.end())
+    return;
+
+  BL::RenderLayer b_rlay = *b_single_rlay;
+
+  if (do_update_only) {
+    /* Sample would be zero at initial tile update, which is only needed
+     * to tag tile form blender side as IN PROGRESS for proper highlight
+     * no buffers should be sent to blender yet. For denoise we also
+     * keep showing the noisy buffers until denoise is done. */
+    bool merge = (rtile.sample != 0) && (rtile.task != RenderTile::DENOISE);
+
+    if (merge) {
+      update_render_result(b_rlay, rtile);
+    }
+
+    end_render_result(b_engine, b_rr, true, highlight, merge);
+  }
+  else {
+    /* Write final render result. */
+    write_render_result(b_rlay, rtile);
+    end_render_result(b_engine, b_rr, false, false, true);
+  }
+}
+
+void BlenderSession::write_render_tile(RenderTile &rtile)
+{
+  do_write_update_render_tile(rtile, false, false);
+}
+
+void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
+{
+  /* use final write for preview renders, otherwise render result wouldn't be
+   * be updated in blender side
+   * would need to be investigated a bit further, but for now shall be fine
+   */
+  if (!b_engine.is_preview())
+    do_write_update_render_tile(rtile, true, highlight);
+  else
+    do_write_update_render_tile(rtile, false, false);
+}
+
+static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
+{
+  string identifier = string_printf("%08x", util_murmur_hash3(name.c_str(), name.length(), 0));
+  string prefix = "cryptomatte/" + identifier.substr(0, 7) + "/";
+
+  render_add_metadata(b_rr, prefix + "name", name);
+  render_add_metadata(b_rr, prefix + "hash", "MurmurHash3_32");
+  render_add_metadata(b_rr, prefix + "conversion", "uint32_to_float32");
+  render_add_metadata(b_rr, prefix + "manifest", manifest);
+}
+
+void BlenderSession::stamp_view_layer_metadata(Scene *scene, const string &view_layer_name)
+{
+  BL::RenderResult b_rr = b_engine.get_result();
+  string prefix = "cycles." + view_layer_name + ".";
+
+  /* Configured number of samples for the view layer. */
+  b_rr.stamp_data_add_field((prefix + "samples").c_str(),
+                            to_string(session->params.samples).c_str());
+
+  /* Store ranged samples information. */
+  if (session->tile_manager.range_num_samples != -1) {
+    b_rr.stamp_data_add_field((prefix + "range_start_sample").c_str(),
+                              to_string(session->tile_manager.range_start_sample).c_str());
+    b_rr.stamp_data_add_field((prefix + "range_num_samples").c_str(),
+                              to_string(session->tile_manager.range_num_samples).c_str());
+  }
+
+  /* Write cryptomatte metadata. */
+  if (scene->film->cryptomatte_passes & CRYPT_OBJECT) {
+    add_cryptomatte_layer(b_rr,
+                          view_layer_name + ".CryptoObject",
+                          scene->object_manager->get_cryptomatte_objects(scene));
+  }
+  if (scene->film->cryptomatte_passes & CRYPT_MATERIAL) {
+    add_cryptomatte_layer(b_rr,
+                          view_layer_name + ".CryptoMaterial",
+                          scene->shader_manager->get_cryptomatte_materials(scene));
+  }
+  if (scene->film->cryptomatte_passes & CRYPT_ASSET) {
+    add_cryptomatte_layer(b_rr,
+                          view_layer_name + ".CryptoAsset",
+                          scene->object_manager->get_cryptomatte_assets(scene));
+  }
+
+  /* Store synchronization and bare-render times. */
+  double total_time, render_time;
+  session->progress.get_time(total_time, render_time);
+  b_rr.stamp_data_add_field((prefix + "total_time").c_str(),
+                            time_human_readable_from_seconds(total_time).c_str());
+  b_rr.stamp_data_add_field((prefix + "render_time").c_str(),
+                            time_human_readable_from_seconds(render_time).c_str());
+  b_rr.stamp_data_add_field((prefix + "synchronization_time").c_str(),
+                            time_human_readable_from_seconds(total_time - render_time).c_str());
+}
+
+void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
+{
+  b_depsgraph = b_depsgraph_;
+
+  /* set callback to write out render results */
+  session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
+  session->update_render_tile_cb = function_bind(
+      &BlenderSession::update_render_tile, this, _1, _2);
+
+  /* get buffer parameters */
+  SessionParams session_params = BlenderSync::get_session_params(
+      b_engine, b_userpref, b_scene, background);
+  BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_render, b_v3d, b_rv3d, scene->camera, width, height);
+
+  /* render each layer */
+  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+
+  /* temporary render result to find needed passes and views */
+  BL::RenderResult b_rr = begin_render_result(
+      b_engine, 0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
+  BL::RenderResult::layers_iterator b_single_rlay;
+  b_rr.layers.begin(b_single_rlay);
+  BL::RenderLayer b_rlay = *b_single_rlay;
+  b_rlay_name = b_view_layer.name();
+
+  /* add passes */
+  vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer);
+  buffer_params.passes = passes;
+
+  PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
+  bool use_denoising = get_boolean(crl, "use_denoising");
+  bool use_optix_denoising = get_boolean(crl, "use_optix_denoising");
+  bool write_denoising_passes = get_boolean(crl, "denoising_store_passes");
+
+  buffer_params.denoising_data_pass = use_denoising || write_denoising_passes;
+  buffer_params.denoising_clean_pass = (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES);
+  buffer_params.denoising_prefiltered_pass = write_denoising_passes && !use_optix_denoising;
+
+  session->params.run_denoising = use_denoising || write_denoising_passes;
+  session->params.full_denoising = use_denoising && !use_optix_denoising;
+  session->params.optix_denoising = use_denoising && use_optix_denoising;
+  session->params.write_denoising_passes = write_denoising_passes && !use_optix_denoising;
+  session->params.denoising.radius = get_int(crl, "denoising_radius");
+  session->params.denoising.strength = get_float(crl, "denoising_strength");
+  session->params.denoising.feature_strength = get_float(crl, "denoising_feature_strength");
+  session->params.denoising.relative_pca = get_boolean(crl, "denoising_relative_pca");
+  session->params.denoising.optix_input_passes = get_enum(crl, "denoising_optix_input_passes");
+  session->tile_manager.schedule_denoising = session->params.run_denoising;
+
+  scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
+  scene->film->denoising_clean_pass = buffer_params.denoising_clean_pass;
+  scene->film->denoising_prefiltered_pass = buffer_params.denoising_prefiltered_pass;
+
+  scene->film->pass_alpha_threshold = b_view_layer.pass_alpha_threshold();
+  scene->film->tag_passes_update(scene, passes);
+  scene->film->tag_update(scene);
+  scene->integrator->tag_update(scene);
+
+  BL::RenderResult::views_iterator b_view_iter;
+
+  int num_views = 0;
+  for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end(); ++b_view_iter) {
+    num_views++;
+  }
+
+  int view_index = 0;
+  for (b_rr.views.begin(b_view_iter); b_view_iter != b_rr.views.end();
+       ++b_view_iter, ++view_index) {
+    b_rview_name = b_view_iter->name();
+
+    /* set the current view */
+    b_engine.active_view_set(b_rview_name.c_str());
+
+    /* update scene */
+    BL::Object b_camera_override(b_engine.camera_override());
+    sync->sync_camera(b_render, b_camera_override, width, height, b_rview_name.c_str());
+    sync->sync_data(
+        b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
+    builtin_images_load();
+
+    /* Attempt to free all data which is held by Blender side, since at this
+     * point we know that we've got everything to render current view layer.
+     */
+    /* At the moment we only free if we are not doing multi-view
+     * (or if we are rendering the last view). See T58142/D4239 for discussion.
+     */
+    if (view_index == num_views - 1) {
+      free_blender_memory_if_possible();
+    }
+
+    /* Make sure all views have different noise patterns. - hardcoded value just to make it random
+     */
+    if (view_index != 0) {
+      scene->integrator->seed += hash_uint2(scene->integrator->seed,
+                                            hash_uint2(view_index * 0xdeadbeef, 0));
+      scene->integrator->tag_update(scene);
+    }
+
+    /* Update number of samples per layer. */
+    int samples = sync->get_layer_samples();
+    bool bound_samples = sync->get_layer_bound_samples();
+    int effective_layer_samples;
+
+    if (samples != 0 && (!bound_samples || (samples < session_params.samples)))
+      effective_layer_samples = samples;
+    else
+      effective_layer_samples = session_params.samples;
+
+    /* Update tile manager if we're doing resumable render. */
+    update_resumable_tile_manager(effective_layer_samples);
+
+    /* Update session itself. */
+    session->reset(buffer_params, effective_layer_samples);
+
+    /* render */
+    session->start();
+    session->wait();
+
+    if (!b_engine.is_preview() && background && print_render_stats) {
+      RenderStats stats;
+      session->collect_statistics(&stats);
+      printf("Render statistics:\n%s\n", stats.full_report().c_str());
+    }
+
+    if (session->progress.get_cancel())
+      break;
+  }
+
+  /* add metadata */
+  stamp_view_layer_metadata(scene, b_rlay_name);
+
+  /* free result without merging */
+  end_render_result(b_engine, b_rr, true, true, false);
+
+  double total_time, render_time;
+  session->progress.get_time(total_time, render_time);
+  VLOG(1) << "Total render time: " << total_time;
+  VLOG(1) << "Render time (without synchronization): " << render_time;
+
+  /* clear callback */
+  session->write_render_tile_cb = function_null;
+  session->update_render_tile_cb = function_null;
+
+  /* TODO: find a way to clear this data for persistent data render */
+#if 0
+  /* free all memory used (host and device), so we wouldn't leave render
+   * engine with extra memory allocated
+   */
+
+  session->device_free();
+
+  delete sync;
+  sync = NULL;
+#endif
+}
+
+static void populate_bake_data(BakeData *data,
+                               const int object_id,
+                               BL::BakePixel &pixel_array,
+                               const int num_pixels)
+{
+  BL::BakePixel bp = pixel_array;
+
+  int i;
+  for (i = 0; i < num_pixels; i++) {
+    if (bp.object_id() == object_id) {
+      data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
+    }
+    else {
+      data->set_null(i);
+    }
+    bp = bp.next();
+  }
+}
+
+static int bake_pass_filter_get(const int pass_filter)
+{
+  int flag = BAKE_FILTER_NONE;
+
+  if ((pass_filter & BL::BakeSettings::pass_filter_DIRECT) != 0)
+    flag |= BAKE_FILTER_DIRECT;
+  if ((pass_filter & BL::BakeSettings::pass_filter_INDIRECT) != 0)
+    flag |= BAKE_FILTER_INDIRECT;
+  if ((pass_filter & BL::BakeSettings::pass_filter_COLOR) != 0)
+    flag |= BAKE_FILTER_COLOR;
+
+  if ((pass_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0)
+    flag |= BAKE_FILTER_DIFFUSE;
+  if ((pass_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0)
+    flag |= BAKE_FILTER_GLOSSY;
+  if ((pass_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0)
+    flag |= BAKE_FILTER_TRANSMISSION;
+  if ((pass_filter & BL::BakeSettings::pass_filter_SUBSURFACE) != 0)
+    flag |= BAKE_FILTER_SUBSURFACE;
+
+  if ((pass_filter & BL::BakeSettings::pass_filter_EMIT) != 0)
+    flag |= BAKE_FILTER_EMISSION;
+  if ((pass_filter & BL::BakeSettings::pass_filter_AO) != 0)
+    flag |= BAKE_FILTER_AO;
+
+  return flag;
+}
+
+void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
+                          BL::Object &b_object,
+                          const string &pass_type,
+                          const int pass_filter,
+                          const int object_id,
+                          BL::BakePixel &pixel_array,
+                          const size_t num_pixels,
+                          const int /*depth*/,
+                          float result[])
+{
+  b_depsgraph = b_depsgraph_;
+
+  ShaderEvalType shader_type = get_shader_type(pass_type);
+
+  /* Set baking flag in advance, so kernel loading can check if we need
+   * any baking capabilities.
+   */
+  scene->bake_manager->set_baking(true);
+
+  /* ensure kernels are loaded before we do any scene updates */
+  session->load_kernels();
+
+  if (shader_type == SHADER_EVAL_UV) {
+    /* force UV to be available */
+    Pass::add(PASS_UV, scene->film->passes);
+  }
+
+  int bake_pass_filter = bake_pass_filter_get(pass_filter);
+  bake_pass_filter = BakeManager::shader_type_to_pass_filter(shader_type, bake_pass_filter);
+
+  /* force use_light_pass to be true if we bake more than just colors */
+  if (bake_pass_filter & ~BAKE_FILTER_COLOR) {
+    Pass::add(PASS_LIGHT, scene->film->passes);
+  }
+
+  /* create device and update scene */
+  scene->film->tag_update(scene);
+  scene->integrator->tag_update(scene);
+
+  if (!session->progress.get_cancel()) {
+    /* update scene */
+    BL::Object b_camera_override(b_engine.camera_override());
+    sync->sync_camera(b_render, b_camera_override, width, height, "");
+    sync->sync_data(
+        b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
+    builtin_images_load();
+  }
+
+  BakeData *bake_data = NULL;
+
+  if (!session->progress.get_cancel()) {
+    /* get buffer parameters */
+    SessionParams session_params = BlenderSync::get_session_params(
+        b_engine, b_userpref, b_scene, background);
+    BufferParams buffer_params = BlenderSync::get_buffer_params(
+        b_render, b_v3d, b_rv3d, scene->camera, width, height);
+
+    scene->bake_manager->set_shader_limit((size_t)b_engine.tile_x(), (size_t)b_engine.tile_y());
+
+    /* set number of samples */
+    session->tile_manager.set_samples(session_params.samples);
+    session->reset(buffer_params, session_params.samples);
+    session->update_scene();
+
+    /* find object index. todo: is arbitrary - copied from mesh_displace.cpp */
+    size_t object_index = OBJECT_NONE;
+    int tri_offset = 0;
+
+    for (size_t i = 0; i < scene->objects.size(); i++) {
+      if (strcmp(scene->objects[i]->name.c_str(), b_object.name().c_str()) == 0) {
+        object_index = i;
+        tri_offset = scene->objects[i]->mesh->tri_offset;
+        break;
+      }
+    }
+
+    /* Object might have been disabled for rendering or excluded in some
+     * other way, in that case Blender will report a warning afterwards. */
+    if (object_index != OBJECT_NONE) {
+      int object = object_index;
+
+      bake_data = scene->bake_manager->init(object, tri_offset, num_pixels);
+      populate_bake_data(bake_data, object_id, pixel_array, num_pixels);
+    }
+
+    /* set number of samples */
+    session->tile_manager.set_samples(session_params.samples);
+    session->reset(buffer_params, session_params.samples);
+    session->update_scene();
+
+    session->progress.set_update_callback(
+        function_bind(&BlenderSession::update_bake_progress, this));
+  }
+
+  /* Perform bake. Check cancel to avoid crash with incomplete scene data. */
+  if (!session->progress.get_cancel() && bake_data) {
+    scene->bake_manager->bake(scene->device,
+                              &scene->dscene,
+                              scene,
+                              session->progress,
+                              shader_type,
+                              bake_pass_filter,
+                              bake_data,
+                              result);
+  }
+
+  /* free all memory used (host and device), so we wouldn't leave render
+   * engine with extra memory allocated
+   */
+
+  session->device_free();
+
+  delete sync;
+  sync = NULL;
+}
+
+void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay,
+                                                   RenderTile &rtile,
+                                                   bool do_update_only)
+{
+  RenderBuffers *buffers = rtile.buffers;
+
+  /* copy data from device */
+  if (!buffers->copy_from_device())
+    return;
+
+  float exposure = scene->film->exposure;
+
+  vector<float> pixels(rtile.w * rtile.h * 4);
+
+  /* Adjust absolute sample number to the range. */
+  int sample = rtile.sample;
+  const int range_start_sample = session->tile_manager.range_start_sample;
+  if (range_start_sample != -1) {
+    sample -= range_start_sample;
+  }
+
+  if (!do_update_only) {
+    /* copy each pass */
+    BL::RenderLayer::passes_iterator b_iter;
+
+    for (b_rlay.passes.begin(b_iter); b_iter != b_rlay.passes.end(); ++b_iter) {
+      BL::RenderPass b_pass(*b_iter);
+      int components = b_pass.channels();
+
+      /* Copy pixels from regular render passes. */
+      bool read = buffers->get_pass_rect(b_pass.name(), exposure, sample, components, &pixels[0]);
+
+      /* If denoising pass, */
+      if (!read) {
+        int denoising_offset = BlenderSync::get_denoising_pass(b_pass);
+        if (denoising_offset >= 0) {
+          read = buffers->get_denoising_pass_rect(
+              denoising_offset, exposure, sample, components, &pixels[0]);
+        }
+      }
+
+      if (!read) {
+        memset(&pixels[0], 0, pixels.size() * sizeof(float));
+      }
+
+      b_pass.rect(&pixels[0]);
+    }
+  }
+  else {
+    /* copy combined pass */
+    BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
+    if (buffers->get_pass_rect("Combined", exposure, sample, 4, &pixels[0]))
+      b_combined_pass.rect(&pixels[0]);
+  }
+}
+
+void BlenderSession::write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
+{
+  do_write_update_render_result(b_rlay, rtile, false);
+}
+
+void BlenderSession::update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
+{
+  do_write_update_render_result(b_rlay, rtile, true);
+}
+
+void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
+{
+  /* only used for viewport render */
+  if (!b_v3d)
+    return;
+
+  /* on session/scene parameter changes, we recreate session entirely */
+  SessionParams session_params = BlenderSync::get_session_params(
+      b_engine, b_userpref, b_scene, background);
+  SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
+  bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+
+  if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
+    free_session();
+    create_session();
+    return;
+  }
+
+  /* increase samples, but never decrease */
+  session->set_samples(session_params.samples);
+  session->set_pause(session_pause);
+
+  /* copy recalc flags, outside of mutex so we can decide to do the real
+   * synchronization at a later time to not block on running updates */
+  sync->sync_recalc(b_depsgraph_, b_v3d);
+
+  /* don't do synchronization if on pause */
+  if (session_pause) {
+    tag_update();
+    return;
+  }
+
+  /* try to acquire mutex. if we don't want to or can't, come back later */
+  if (!session->ready_to_reset() || !session->scene->mutex.try_lock()) {
+    tag_update();
+    return;
+  }
+
+  /* data and camera synchronize */
+  b_depsgraph = b_depsgraph_;
+
+  BL::Object b_camera_override(b_engine.camera_override());
+  sync->sync_data(
+      b_render, b_depsgraph, b_v3d, b_camera_override, width, height, &python_thread_state);
+
+  if (b_rv3d)
+    sync->sync_view(b_v3d, b_rv3d, width, height);
+  else
+    sync->sync_camera(b_render, b_camera_override, width, height, "");
+
+  /* reset if needed */
+  if (scene->need_reset()) {
+    BufferParams buffer_params = BlenderSync::get_buffer_params(
+        b_render, b_v3d, b_rv3d, scene->camera, width, height);
+    session->reset(buffer_params, session_params.samples);
+
+    /* After session reset, so device is not accessing image data anymore. */
+    builtin_images_load();
+
+    /* reset time */
+    start_resize_time = 0.0;
+  }
+
+  /* unlock */
+  session->scene->mutex.unlock();
+
+  /* Start rendering thread, if it's not running already. Do this
+   * after all scene data has been synced at least once. */
+  session->start();
+}
+
+bool BlenderSession::draw(int w, int h)
+{
+  /* pause in redraw in case update is not being called due to final render */
+  session->set_pause(BlenderSync::get_session_pause(b_scene, background));
+
+  /* before drawing, we verify camera and viewport size changes, because
+   * we do not get update callbacks for those, we must detect them here */
+  if (session->ready_to_reset()) {
+    bool reset = false;
+
+    /* if dimensions changed, reset */
+    if (width != w || height != h) {
+      if (start_resize_time == 0.0) {
+        /* don't react immediately to resizes to avoid flickery resizing
+         * of the viewport, and some window managers changing the window
+         * size temporarily on unminimize */
+        start_resize_time = time_dt();
+        tag_redraw();
+      }
+      else if (time_dt() - start_resize_time < 0.2) {
+        tag_redraw();
+      }
+      else {
+        width = w;
+        height = h;
+        reset = true;
+      }
+    }
+
+    /* try to acquire mutex. if we can't, come back later */
+    if (!session->scene->mutex.try_lock()) {
+      tag_update();
+    }
+    else {
+      /* update camera from 3d view */
+
+      sync->sync_view(b_v3d, b_rv3d, width, height);
+
+      if (scene->camera->need_update)
+        reset = true;
+
+      session->scene->mutex.unlock();
+    }
+
+    /* reset if requested */
+    if (reset) {
+      SessionParams session_params = BlenderSync::get_session_params(
+          b_engine, b_userpref, b_scene, background);
+      BufferParams buffer_params = BlenderSync::get_buffer_params(
+          b_render, b_v3d, b_rv3d, scene->camera, width, height);
+      bool session_pause = BlenderSync::get_session_pause(b_scene, background);
+
+      if (session_pause == false) {
+        session->reset(buffer_params, session_params.samples);
+        start_resize_time = 0.0;
+      }
+    }
+  }
+  else {
+    tag_update();
+  }
+
+  /* update status and progress for 3d view draw */
+  update_status_progress();
+
+  /* draw */
+  BufferParams buffer_params = BlenderSync::get_buffer_params(
+      b_render, b_v3d, b_rv3d, scene->camera, width, height);
+  DeviceDrawParams draw_params;
+
+  if (session->params.display_buffer_linear) {
+    draw_params.bind_display_space_shader_cb = function_bind(
+        &BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene);
+    draw_params.unbind_display_space_shader_cb = function_bind(
+        &BL::RenderEngine::unbind_display_space_shader, &b_engine);
+  }
+
+  return !session->draw(buffer_params, draw_params);
+}
+
+void BlenderSession::get_status(string &status, string &substatus)
+{
+  session->progress.get_status(status, substatus);
+}
+
+void BlenderSession::get_kernel_status(string &kernel_status)
+{
+  session->progress.get_kernel_status(kernel_status);
+}
+
+void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
+{
+  session->progress.get_time(total_time, render_time);
+  progress = session->progress.get_progress();
+}
+
+void BlenderSession::update_bake_progress()
+{
+  float progress = session->progress.get_progress();
+
+  if (progress != last_progress) {
+    b_engine.update_progress(progress);
+    last_progress = progress;
+  }
+}
+
+void BlenderSession::update_status_progress()
+{
+  string timestatus, status, substatus, kernel_status;
+  string scene_status = "";
+  float progress;
+  double total_time, remaining_time = 0, render_time;
+  float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
+  float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
+
+  get_status(status, substatus);
+  get_kernel_status(kernel_status);
+  get_progress(progress, total_time, render_time);
+
+  if (progress > 0)
+    remaining_time = (1.0 - (double)progress) * (render_time / (double)progress);
+
+  if (background) {
+    scene_status += " | " + scene->name;
+    if (b_rlay_name != "")
+      scene_status += ", " + b_rlay_name;
+
+    if (b_rview_name != "")
+      scene_status += ", " + b_rview_name;
+
+    if (remaining_time > 0) {
+      timestatus += "Remaining:" + time_human_readable_from_seconds(remaining_time) + " | ";
+    }
+
+    timestatus += string_printf("Mem:%.2fM, Peak:%.2fM", (double)mem_used, (double)mem_peak);
+
+    if (status.size() > 0)
+      status = " | " + status;
+    if (substatus.size() > 0)
+      status += " | " + substatus;
+    if (kernel_status.size() > 0)
+      status += " | " + kernel_status;
+  }
+
+  double current_time = time_dt();
+  /* When rendering in a window, redraw the status at least once per second to keep the elapsed and
+   * remaining time up-to-date. For headless rendering, only report when something significant
+   * changes to keep the console output readable. */
+  if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
+    b_engine.update_stats("", (timestatus + scene_status + status).c_str());
+    b_engine.update_memory_stats(mem_used, mem_peak);
+    last_status = status;
+    last_status_time = current_time;
+  }
+  if (progress != last_progress) {
+    b_engine.update_progress(progress);
+    last_progress = progress;
+  }
+
+  if (session->progress.get_error()) {
+    string error = session->progress.get_error_message();
+    if (error != last_error) {
+      /* TODO(sergey): Currently C++ RNA API doesn't let us to
+       * use mnemonic name for the variable. Would be nice to
+       * have this figured out.
+       *
+       * For until then, 1 << 5 means RPT_ERROR.
+       */
+      b_engine.report(1 << 5, error.c_str());
+      b_engine.error_set(error.c_str());
+      last_error = error;
+    }
+  }
+}
+
+void BlenderSession::tag_update()
+{
+  /* tell blender that we want to get another update callback */
+  b_engine.tag_update();
+}
+
+void BlenderSession::tag_redraw()
+{
+  if (background) {
+    /* update stats and progress, only for background here because
+     * in 3d view we do it in draw for thread safety reasons */
+    update_status_progress();
+
+    /* offline render, redraw if timeout passed */
+    if (time_dt() - last_redraw_time > 1.0) {
+      b_engine.tag_redraw();
+      last_redraw_time = time_dt();
+    }
+  }
+  else {
+    /* tell blender that we want to redraw */
+    b_engine.tag_redraw();
+  }
+}
+
+void BlenderSession::test_cancel()
+{
+  /* test if we need to cancel rendering */
+  if (background)
+    if (b_engine.test_break())
+      session->progress.set_cancel("Cancelled");
+}
+
+/* builtin image file name is actually an image datablock name with
+ * absolute sequence frame number concatenated via '@' character
+ *
+ * this function splits frame from builtin name
+ */
+int BlenderSession::builtin_image_frame(const string &builtin_name)
+{
+  int last = builtin_name.find_last_of('@');
+  return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str());
+}
+
+void BlenderSession::builtin_image_info(const string &builtin_name,
+                                        void *builtin_data,
+                                        ImageMetaData &metadata)
+{
+  /* empty image */
+  metadata.width = 1;
+  metadata.height = 1;
+
+  if (!builtin_data)
+    return;
+
+  /* recover ID pointer */
+  PointerRNA ptr;
+  RNA_id_pointer_create((ID *)builtin_data, &ptr);
+  BL::ID b_id(ptr);
+
+  if (b_id.is_a(&RNA_Image)) {
+    /* image data */
+    BL::Image b_image(b_id);
+
+    metadata.builtin_free_cache = !b_image.has_data();
+    metadata.is_float = b_image.is_float();
+    metadata.width = b_image.size()[0];
+    metadata.height = b_image.size()[1];
+    metadata.depth = 1;
+    metadata.channels = b_image.channels();
+
+    if (metadata.is_float) {
+      /* Float images are already converted on the Blender side,
+       * no need to do anything in Cycles. */
+      metadata.colorspace = u_colorspace_raw;
+    }
+  }
+  else if (b_id.is_a(&RNA_Object)) {
+    /* smoke volume data */
+    BL::Object b_ob(b_id);
+    BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob);
+
+    metadata.is_float = true;
+    metadata.depth = 1;
+    metadata.channels = 1;
+
+    if (!b_domain)
+      return;
+
+    if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY) ||
+        builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME) ||
+        builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT) ||
+        builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE))
+      metadata.channels = 1;
+    else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR))
+      metadata.channels = 4;
+    else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY))
+      metadata.channels = 3;
+    else
+      return;
+
+    int3 resolution = get_int3(b_domain.domain_resolution());
+    int amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+    /* Velocity and heat data is always low-resolution. */
+    if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
+        builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
+      amplify = 1;
+    }
+
+    metadata.width = resolution.x * amplify;
+    metadata.height = resolution.y * amplify;
+    metadata.depth = resolution.z * amplify;
+  }
+  else {
+    /* TODO(sergey): Check we're indeed in shader node tree. */
+    PointerRNA ptr;
+    RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr);
+    BL::Node b_node(ptr);
+    if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
+      BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
+      metadata.channels = 4;
+      metadata.width = b_point_density_node.resolution();
+      metadata.height = metadata.width;
+      metadata.depth = metadata.width;
+      metadata.is_float = true;
+    }
+  }
+}
+
+bool BlenderSession::builtin_image_pixels(const string &builtin_name,
+                                          void *builtin_data,
+                                          int tile,
+                                          unsigned char *pixels,
+                                          const size_t pixels_size,
+                                          const bool associate_alpha,
+                                          const bool free_cache)
+{
+  if (!builtin_data) {
+    return false;
+  }
+
+  const int frame = builtin_image_frame(builtin_name);
+
+  PointerRNA ptr;
+  RNA_id_pointer_create((ID *)builtin_data, &ptr);
+  BL::Image b_image(ptr);
+
+  const int width = b_image.size()[0];
+  const int height = b_image.size()[1];
+  const int channels = b_image.channels();
+
+  unsigned char *image_pixels = image_get_pixels_for_frame(b_image, frame, tile);
+  const size_t num_pixels = ((size_t)width) * height;
+
+  if (image_pixels && num_pixels * channels == pixels_size) {
+    memcpy(pixels, image_pixels, pixels_size * sizeof(unsigned char));
+  }
+  else {
+    if (channels == 1) {
+      memset(pixels, 0, pixels_size * sizeof(unsigned char));
+    }
+    else {
+      const size_t num_pixels_safe = pixels_size / channels;
+      unsigned char *cp = pixels;
+      for (size_t i = 0; i < num_pixels_safe; i++, cp += channels) {
+        cp[0] = 255;
+        cp[1] = 0;
+        cp[2] = 255;
+        if (channels == 4) {
+          cp[3] = 255;
+        }
+      }
+    }
+  }
+
+  if (image_pixels) {
+    MEM_freeN(image_pixels);
+  }
+
+  /* Free image buffers to save memory during render. */
+  if (free_cache) {
+    b_image.buffers_free();
+  }
+
+  if (associate_alpha) {
+    /* Premultiply, byte images are always straight for Blender. */
+    unsigned char *cp = pixels;
+    for (size_t i = 0; i < num_pixels; i++, cp += channels) {
+      cp[0] = (cp[0] * cp[3]) >> 8;
+      cp[1] = (cp[1] * cp[3]) >> 8;
+      cp[2] = (cp[2] * cp[3]) >> 8;
+    }
+  }
+  return true;
+}
+
+bool BlenderSession::builtin_image_float_pixels(const string &builtin_name,
+                                                void *builtin_data,
+                                                int tile,
+                                                float *pixels,
+                                                const size_t pixels_size,
+                                                const bool,
+                                                const bool free_cache)
+{
+  if (!builtin_data) {
+    return false;
+  }
+
+  PointerRNA ptr;
+  RNA_id_pointer_create((ID *)builtin_data, &ptr);
+  BL::ID b_id(ptr);
+
+  if (b_id.is_a(&RNA_Image)) {
+    /* image data */
+    BL::Image b_image(b_id);
+    int frame = builtin_image_frame(builtin_name);
+
+    const int width = b_image.size()[0];
+    const int height = b_image.size()[1];
+    const int channels = b_image.channels();
+
+    float *image_pixels;
+    image_pixels = image_get_float_pixels_for_frame(b_image, frame, tile);
+    const size_t num_pixels = ((size_t)width) * height;
+
+    if (image_pixels && num_pixels * channels == pixels_size) {
+      memcpy(pixels, image_pixels, pixels_size * sizeof(float));
+    }
+    else {
+      if (channels == 1) {
+        memset(pixels, 0, num_pixels * sizeof(float));
+      }
+      else {
+        const size_t num_pixels_safe = pixels_size / channels;
+        float *fp = pixels;
+        for (int i = 0; i < num_pixels_safe; i++, fp += channels) {
+          fp[0] = 1.0f;
+          fp[1] = 0.0f;
+          fp[2] = 1.0f;
+          if (channels == 4) {
+            fp[3] = 1.0f;
+          }
+        }
+      }
+    }
+
+    if (image_pixels) {
+      MEM_freeN(image_pixels);
+    }
+
+    /* Free image buffers to save memory during render. */
+    if (free_cache) {
+      b_image.buffers_free();
+    }
+
+    return true;
+  }
+  else if (b_id.is_a(&RNA_Object)) {
+    /* smoke volume data */
+    BL::Object b_ob(b_id);
+    BL::FluidDomainSettings b_domain = object_fluid_domain_find(b_ob);
+
+    if (!b_domain) {
+      return false;
+    }
+
+    int3 resolution = get_int3(b_domain.domain_resolution());
+    int length, amplify = (b_domain.use_noise()) ? b_domain.noise_scale() : 1;
+
+    /* Velocity and heat data is always low-resolution. */
+    if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY) ||
+        builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
+      amplify = 1;
+    }
+
+    const int width = resolution.x * amplify;
+    const int height = resolution.y * amplify;
+    const int depth = resolution.z * amplify;
+    const size_t num_pixels = ((size_t)width) * height * depth;
+
+    if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
+      FluidDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels) {
+        FluidDomainSettings_density_grid_get(&b_domain.ptr, pixels);
+        return true;
+      }
+    }
+    else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) {
+      /* this is in range 0..1, and interpreted by the OpenGL smoke viewer
+       * as 1500..3000 K with the first part faded to zero density */
+      FluidDomainSettings_flame_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels) {
+        FluidDomainSettings_flame_grid_get(&b_domain.ptr, pixels);
+        return true;
+      }
+    }
+    else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) {
+      /* the RGB is "premultiplied" by density for better interpolation results */
+      FluidDomainSettings_color_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels * 4) {
+        FluidDomainSettings_color_grid_get(&b_domain.ptr, pixels);
+        return true;
+      }
+    }
+    else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_VELOCITY)) {
+      FluidDomainSettings_velocity_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels * 3) {
+        FluidDomainSettings_velocity_grid_get(&b_domain.ptr, pixels);
+        return true;
+      }
+    }
+    else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_HEAT)) {
+      FluidDomainSettings_heat_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels) {
+        FluidDomainSettings_heat_grid_get(&b_domain.ptr, pixels);
+        return true;
+      }
+    }
+    else if (builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_TEMPERATURE)) {
+      FluidDomainSettings_temperature_grid_get_length(&b_domain.ptr, &length);
+      if (length == num_pixels) {
+        FluidDomainSettings_temperature_grid_get(&b_domain.ptr, pixels);
+        return true;
+      }
+    }
+    else {
+      fprintf(
+          stderr, "Cycles error: unknown volume attribute %s, skipping\n", builtin_name.c_str());
+      pixels[0] = 0.0f;
+      return false;
+    }
+
+    fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n");
+  }
+  else {
+    /* We originally were passing view_layer here but in reality we need a
+     * a depsgraph to pass to the RE_point_density_minmax() function.
+     */
+    /* TODO(sergey): Check we're indeed in shader node tree. */
+    PointerRNA ptr;
+    RNA_pointer_create(NULL, &RNA_Node, builtin_data, &ptr);
+    BL::Node b_node(ptr);
+    if (b_node.is_a(&RNA_ShaderNodeTexPointDensity)) {
+      BL::ShaderNodeTexPointDensity b_point_density_node(b_node);
+      int length;
+      b_point_density_node.calc_point_density(b_depsgraph, &length, &pixels);
+    }
+  }
+
+  return false;
+}
+
+void BlenderSession::builtin_images_load()
+{
+  /* Force builtin images to be loaded along with Blender data sync. This
+   * is needed because we may be reading from depsgraph evaluated data which
+   * can be freed by Blender before Cycles reads it.
+   *
+   * TODO: the assumption that no further access to builtin image data will
+   * happen is really weak, and likely to break in the future. We should find
+   * a better solution to hand over the data directly to the image manager
+   * instead of through callbacks whose timing is difficult to control. */
+  ImageManager *manager = session->scene->image_manager;
+  Device *device = session->device;
+  manager->device_load_builtin(device, session->scene, session->progress);
+}
+
+void BlenderSession::update_resumable_tile_manager(int num_samples)
+{
+  const int num_resumable_chunks = BlenderSession::num_resumable_chunks,
+            current_resumable_chunk = BlenderSession::current_resumable_chunk;
+  if (num_resumable_chunks == 0) {
+    return;
+  }
+
+  if (num_resumable_chunks > num_samples) {
+    fprintf(stderr,
+            "Cycles warning: more sample chunks (%d) than samples (%d), "
+            "this will cause some samples to be included in multiple chunks.\n",
+            num_resumable_chunks,
+            num_samples);
+  }
+
+  const float num_samples_per_chunk = (float)num_samples / num_resumable_chunks;
+
+  float range_start_sample, range_num_samples;
+  if (current_resumable_chunk != 0) {
+    /* Single chunk rendering. */
+    range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
+    range_num_samples = num_samples_per_chunk;
+  }
+  else {
+    /* Ranged-chunks. */
+    const int num_chunks = end_resumable_chunk - start_resumable_chunk + 1;
+    range_start_sample = num_samples_per_chunk * (start_resumable_chunk - 1);
+    range_num_samples = num_chunks * num_samples_per_chunk;
+  }
+
+  /* Round after doing the multiplications with num_chunks and num_samples_per_chunk
+   * to allow for many small chunks. */
+  int rounded_range_start_sample = (int)floorf(range_start_sample + 0.5f);
+  int rounded_range_num_samples = max((int)floorf(range_num_samples + 0.5f), 1);
+
+  /* Make sure we don't overshoot. */
+  if (rounded_range_start_sample + rounded_range_num_samples > num_samples) {
+    rounded_range_num_samples = num_samples - rounded_range_num_samples;
+  }
+
+  VLOG(1) << "Samples range start is " << range_start_sample << ", "
+          << "number of samples to render is " << range_num_samples;
+
+  scene->integrator->start_sample = rounded_range_start_sample;
+  scene->integrator->tag_update(scene);
+
+  session->tile_manager.range_start_sample = rounded_range_start_sample;
+  session->tile_manager.range_num_samples = rounded_range_num_samples;
+}
+
+void BlenderSession::free_blender_memory_if_possible()
+{
+  if (!background) {
+    /* During interactive render we can not free anything: attempts to save
+     * memory would cause things to be allocated and evaluated for every
+     * updated sample.
+     */
+    return;
+  }
+  b_engine.free_blender_memory();
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
--- a/intern/cycles/blender/blender_sync.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/blender_sync.cpp	2020-01-10 20:42:43.457590054 +0300
@@ -291,6 +291,16 @@
   integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
   integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold");

+  if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) {
+    integrator->sampling_pattern = SAMPLING_PATTERN_PMJ;
+    integrator->adaptive_min_samples = get_int(cscene, "adaptive_min_samples");
+    integrator->adaptive_threshold = get_float(cscene, "adaptive_threshold");
+  }
+  else {
+    integrator->adaptive_min_samples = INT_MAX;
+    integrator->adaptive_threshold = 0.0f;
+  }
+
   int diffuse_samples = get_int(cscene, "diffuse_samples");
   int glossy_samples = get_int(cscene, "glossy_samples");
   int transmission_samples = get_int(cscene, "transmission_samples");
@@ -307,6 +317,8 @@
     integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples;
     integrator->subsurface_samples = subsurface_samples * subsurface_samples;
     integrator->volume_samples = volume_samples * volume_samples;
+    integrator->adaptive_min_samples = min(
+        integrator->adaptive_min_samples * integrator->adaptive_min_samples, INT_MAX);
   }
   else {
     integrator->diffuse_samples = diffuse_samples;
@@ -482,6 +494,8 @@
   MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
 #endif
   MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
+  MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
+  MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
   if (string_startswith(name, cryptomatte_prefix)) {
     return PASS_CRYPTOMATTE;
   }
@@ -517,7 +531,9 @@
   return -1;
 }

-vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
+                                             BL::ViewLayer &b_view_layer,
+                                             bool adaptive_sampling)
 {
   vector<Pass> passes;

@@ -595,6 +611,10 @@
     b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
     Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
   }
+  if (get_boolean(crp, "pass_debug_sample_count")) {
+    b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
+    Pass::add(PASS_SAMPLE_COUNT, passes);
+  }
   if (get_boolean(crp, "use_pass_volume_direct")) {
     b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
     Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
@@ -656,6 +676,13 @@
   }
   RNA_END;

+  if (adaptive_sampling) {
+    Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes);
+    if (!get_boolean(crp, "pass_debug_sample_count")) {
+      Pass::add(PASS_SAMPLE_COUNT, passes);
+    }
+  }
+
   return passes;
 }

@@ -889,6 +916,8 @@
   params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
                          BlenderSession::print_render_stats;

+  params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling");
+
   return params;
 }

diff -Naur a/intern/cycles/blender/blender_sync.cpp.orig b/intern/cycles/blender/blender_sync.cpp.orig
--- a/intern/cycles/blender/blender_sync.cpp.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/blender/blender_sync.cpp.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,895 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/background.h"
+#include "render/camera.h"
+#include "render/film.h"
+#include "render/graph.h"
+#include "render/integrator.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/nodes.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/shader.h"
+#include "render/curves.h"
+
+#include "device/device.h"
+
+#include "blender/blender_device.h"
+#include "blender/blender_sync.h"
+#include "blender/blender_session.h"
+#include "blender/blender_util.h"
+
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_opengl.h"
+#include "util/util_hash.h"
+
+CCL_NAMESPACE_BEGIN
+
+static const char *cryptomatte_prefix = "Crypto";
+
+/* Constructor */
+
+BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
+                         BL::BlendData &b_data,
+                         BL::Scene &b_scene,
+                         Scene *scene,
+                         bool preview,
+                         Progress &progress)
+    : b_engine(b_engine),
+      b_data(b_data),
+      b_scene(b_scene),
+      shader_map(&scene->shaders),
+      object_map(&scene->objects),
+      mesh_map(&scene->meshes),
+      light_map(&scene->lights),
+      particle_system_map(&scene->particle_systems),
+      world_map(NULL),
+      world_recalc(false),
+      scene(scene),
+      preview(preview),
+      experimental(false),
+      dicing_rate(1.0f),
+      max_subdivisions(12),
+      progress(progress)
+{
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
+                          RNA_float_get(&cscene, "dicing_rate");
+  max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
+}
+
+BlenderSync::~BlenderSync()
+{
+}
+
+/* Sync */
+
+void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d)
+{
+  /* Sync recalc flags from blender to cycles. Actual update is done separate,
+   * so we can do it later on if doing it immediate is not suitable. */
+
+  bool has_updated_objects = b_depsgraph.id_type_updated(BL::DriverTarget::id_type_OBJECT);
+
+  if (experimental) {
+    /* Mark all meshes as needing to be exported again if dicing changed. */
+    PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+    bool dicing_prop_changed = false;
+
+    float updated_dicing_rate = preview ? RNA_float_get(&cscene, "preview_dicing_rate") :
+                                          RNA_float_get(&cscene, "dicing_rate");
+
+    if (dicing_rate != updated_dicing_rate) {
+      dicing_rate = updated_dicing_rate;
+      dicing_prop_changed = true;
+    }
+
+    int updated_max_subdivisions = RNA_int_get(&cscene, "max_subdivisions");
+
+    if (max_subdivisions != updated_max_subdivisions) {
+      max_subdivisions = updated_max_subdivisions;
+      dicing_prop_changed = true;
+    }
+
+    if (dicing_prop_changed) {
+      for (const pair<void *, Mesh *> &iter : mesh_map.key_to_scene_data()) {
+        Mesh *mesh = iter.second;
+        if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
+          mesh_map.set_recalc(iter.first);
+        }
+      }
+    }
+  }
+
+  /* Iterate over all IDs in this depsgraph. */
+  BL::Depsgraph::updates_iterator b_update;
+  for (b_depsgraph.updates.begin(b_update); b_update != b_depsgraph.updates.end(); ++b_update) {
+    BL::ID b_id(b_update->id());
+
+    /* Material */
+    if (b_id.is_a(&RNA_Material)) {
+      BL::Material b_mat(b_id);
+      shader_map.set_recalc(b_mat);
+    }
+    /* Light */
+    else if (b_id.is_a(&RNA_Light)) {
+      BL::Light b_light(b_id);
+      shader_map.set_recalc(b_light);
+    }
+    /* Object */
+    else if (b_id.is_a(&RNA_Object)) {
+      BL::Object b_ob(b_id);
+      const bool updated_geometry = b_update->is_updated_geometry();
+
+      if (b_update->is_updated_transform()) {
+        object_map.set_recalc(b_ob);
+        light_map.set_recalc(b_ob);
+      }
+
+      if (object_is_mesh(b_ob)) {
+        if (updated_geometry ||
+            (object_subdivision_type(b_ob, preview, experimental) != Mesh::SUBDIVISION_NONE)) {
+          BL::ID key = BKE_object_is_modified(b_ob) ? b_ob : b_ob.data();
+          mesh_map.set_recalc(key);
+        }
+      }
+      else if (object_is_light(b_ob)) {
+        if (updated_geometry) {
+          light_map.set_recalc(b_ob);
+        }
+      }
+
+      if (updated_geometry) {
+        BL::Object::particle_systems_iterator b_psys;
+        for (b_ob.particle_systems.begin(b_psys); b_psys != b_ob.particle_systems.end(); ++b_psys)
+          particle_system_map.set_recalc(b_ob);
+      }
+    }
+    /* Mesh */
+    else if (b_id.is_a(&RNA_Mesh)) {
+      BL::Mesh b_mesh(b_id);
+      mesh_map.set_recalc(b_mesh);
+    }
+    /* World */
+    else if (b_id.is_a(&RNA_World)) {
+      BL::World b_world(b_id);
+      if (world_map == b_world.ptr.data) {
+        world_recalc = true;
+      }
+    }
+  }
+
+  BlenderViewportParameters new_viewport_parameters(b_v3d);
+  if (viewport_parameters.modified(new_viewport_parameters)) {
+    world_recalc = true;
+  }
+
+  /* Updates shader with object dependency if objects changed. */
+  if (has_updated_objects) {
+    if (scene->default_background->has_object_dependency) {
+      world_recalc = true;
+    }
+
+    foreach (Shader *shader, scene->shaders) {
+      if (shader->has_object_dependency) {
+        shader->need_sync_object = true;
+      }
+    }
+  }
+}
+
+void BlenderSync::sync_data(BL::RenderSettings &b_render,
+                            BL::Depsgraph &b_depsgraph,
+                            BL::SpaceView3D &b_v3d,
+                            BL::Object &b_override,
+                            int width,
+                            int height,
+                            void **python_thread_state)
+{
+  BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
+
+  sync_view_layer(b_v3d, b_view_layer);
+  sync_integrator();
+  sync_film(b_v3d);
+  sync_shaders(b_depsgraph, b_v3d);
+  sync_images();
+  sync_curve_settings();
+
+  mesh_synced.clear(); /* use for objects and motion sync */
+
+  if (scene->need_motion() == Scene::MOTION_PASS || scene->need_motion() == Scene::MOTION_NONE ||
+      scene->camera->motion_position == Camera::MOTION_POSITION_CENTER) {
+    sync_objects(b_depsgraph, b_v3d);
+  }
+  sync_motion(b_render, b_depsgraph, b_v3d, b_override, width, height, python_thread_state);
+
+  mesh_synced.clear();
+
+  /* Shader sync done at the end, since object sync uses it.
+   * false = don't delete unused shaders, not supported. */
+  shader_map.post_sync(false);
+
+  free_data_after_sync(b_depsgraph);
+}
+
+/* Integrator */
+
+void BlenderSync::sync_integrator()
+{
+  BL::RenderSettings r = b_scene.render();
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+  experimental = (get_enum(cscene, "feature_set") != 0);
+
+  Integrator *integrator = scene->integrator;
+  Integrator previntegrator = *integrator;
+
+  integrator->min_bounce = get_int(cscene, "min_light_bounces");
+  integrator->max_bounce = get_int(cscene, "max_bounces");
+
+  integrator->max_diffuse_bounce = get_int(cscene, "diffuse_bounces");
+  integrator->max_glossy_bounce = get_int(cscene, "glossy_bounces");
+  integrator->max_transmission_bounce = get_int(cscene, "transmission_bounces");
+  integrator->max_volume_bounce = get_int(cscene, "volume_bounces");
+
+  integrator->transparent_min_bounce = get_int(cscene, "min_transparent_bounces");
+  integrator->transparent_max_bounce = get_int(cscene, "transparent_max_bounces");
+
+  integrator->volume_max_steps = get_int(cscene, "volume_max_steps");
+  integrator->volume_step_size = get_float(cscene, "volume_step_size");
+
+  integrator->caustics_reflective = get_boolean(cscene, "caustics_reflective");
+  integrator->caustics_refractive = get_boolean(cscene, "caustics_refractive");
+  integrator->filter_glossy = get_float(cscene, "blur_glossy");
+
+  integrator->seed = get_int(cscene, "seed");
+  if (get_boolean(cscene, "use_animated_seed")) {
+    integrator->seed = hash_uint2(b_scene.frame_current(), get_int(cscene, "seed"));
+    if (b_scene.frame_subframe() != 0.0f) {
+      /* TODO(sergey): Ideally should be some sort of hash_merge,
+       * but this is good enough for now.
+       */
+      integrator->seed += hash_uint2((int)(b_scene.frame_subframe() * (float)INT_MAX),
+                                     get_int(cscene, "seed"));
+    }
+  }
+
+  integrator->sampling_pattern = (SamplingPattern)get_enum(
+      cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_SOBOL);
+
+  integrator->sample_clamp_direct = get_float(cscene, "sample_clamp_direct");
+  integrator->sample_clamp_indirect = get_float(cscene, "sample_clamp_indirect");
+  if (!preview) {
+    if (integrator->motion_blur != r.use_motion_blur()) {
+      scene->object_manager->tag_update(scene);
+      scene->camera->tag_update();
+    }
+
+    integrator->motion_blur = r.use_motion_blur();
+  }
+
+  integrator->method = (Integrator::Method)get_enum(
+      cscene, "progressive", Integrator::NUM_METHODS, Integrator::PATH);
+
+  integrator->sample_all_lights_direct = get_boolean(cscene, "sample_all_lights_direct");
+  integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
+  integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold");
+
+  int diffuse_samples = get_int(cscene, "diffuse_samples");
+  int glossy_samples = get_int(cscene, "glossy_samples");
+  int transmission_samples = get_int(cscene, "transmission_samples");
+  int ao_samples = get_int(cscene, "ao_samples");
+  int mesh_light_samples = get_int(cscene, "mesh_light_samples");
+  int subsurface_samples = get_int(cscene, "subsurface_samples");
+  int volume_samples = get_int(cscene, "volume_samples");
+
+  if (get_boolean(cscene, "use_square_samples")) {
+    integrator->diffuse_samples = diffuse_samples * diffuse_samples;
+    integrator->glossy_samples = glossy_samples * glossy_samples;
+    integrator->transmission_samples = transmission_samples * transmission_samples;
+    integrator->ao_samples = ao_samples * ao_samples;
+    integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples;
+    integrator->subsurface_samples = subsurface_samples * subsurface_samples;
+    integrator->volume_samples = volume_samples * volume_samples;
+  }
+  else {
+    integrator->diffuse_samples = diffuse_samples;
+    integrator->glossy_samples = glossy_samples;
+    integrator->transmission_samples = transmission_samples;
+    integrator->ao_samples = ao_samples;
+    integrator->mesh_light_samples = mesh_light_samples;
+    integrator->subsurface_samples = subsurface_samples;
+    integrator->volume_samples = volume_samples;
+  }
+
+  if (b_scene.render().use_simplify()) {
+    if (preview) {
+      integrator->ao_bounces = get_int(cscene, "ao_bounces");
+    }
+    else {
+      integrator->ao_bounces = get_int(cscene, "ao_bounces_render");
+    }
+  }
+  else {
+    integrator->ao_bounces = 0;
+  }
+
+  if (integrator->modified(previntegrator))
+    integrator->tag_update(scene);
+}
+
+/* Film */
+
+void BlenderSync::sync_film(BL::SpaceView3D &b_v3d)
+{
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+  Film *film = scene->film;
+  Film prevfilm = *film;
+
+  if (b_v3d) {
+    film->display_pass = update_viewport_display_passes(b_v3d, film->passes);
+  }
+
+  film->exposure = get_float(cscene, "film_exposure");
+  film->filter_type = (FilterType)get_enum(
+      cscene, "pixel_filter_type", FILTER_NUM_TYPES, FILTER_BLACKMAN_HARRIS);
+  film->filter_width = (film->filter_type == FILTER_BOX) ? 1.0f :
+                                                           get_float(cscene, "filter_width");
+
+  if (b_scene.world()) {
+    BL::WorldMistSettings b_mist = b_scene.world().mist_settings();
+
+    film->mist_start = b_mist.start();
+    film->mist_depth = b_mist.depth();
+
+    switch (b_mist.falloff()) {
+      case BL::WorldMistSettings::falloff_QUADRATIC:
+        film->mist_falloff = 2.0f;
+        break;
+      case BL::WorldMistSettings::falloff_LINEAR:
+        film->mist_falloff = 1.0f;
+        break;
+      case BL::WorldMistSettings::falloff_INVERSE_QUADRATIC:
+        film->mist_falloff = 0.5f;
+        break;
+    }
+  }
+
+  if (film->modified(prevfilm)) {
+    film->tag_update(scene);
+    film->tag_passes_update(scene, prevfilm.passes, false);
+  }
+}
+
+/* Render Layer */
+
+void BlenderSync::sync_view_layer(BL::SpaceView3D & /*b_v3d*/, BL::ViewLayer &b_view_layer)
+{
+  /* render layer */
+  view_layer.name = b_view_layer.name();
+  view_layer.use_background_shader = b_view_layer.use_sky();
+  view_layer.use_background_ao = b_view_layer.use_ao();
+  view_layer.use_surfaces = b_view_layer.use_solid();
+  view_layer.use_hair = b_view_layer.use_strand();
+
+  /* Material override. */
+  view_layer.material_override = b_view_layer.material_override();
+
+  /* Sample override. */
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  int use_layer_samples = get_enum(cscene, "use_layer_samples");
+
+  view_layer.bound_samples = (use_layer_samples == 1);
+  view_layer.samples = 0;
+
+  if (use_layer_samples != 2) {
+    int samples = b_view_layer.samples();
+    if (get_boolean(cscene, "use_square_samples"))
+      view_layer.samples = samples * samples;
+    else
+      view_layer.samples = samples;
+  }
+}
+
+/* Images */
+void BlenderSync::sync_images()
+{
+  /* Sync is a convention for this API, but currently it frees unused buffers. */
+
+  const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
+  if (is_interface_locked == false && BlenderSession::headless == false) {
+    /* If interface is not locked, it's possible image is needed for
+     * the display.
+     */
+    return;
+  }
+  /* Free buffers used by images which are not needed for render. */
+  BL::BlendData::images_iterator b_image;
+  for (b_data.images.begin(b_image); b_image != b_data.images.end(); ++b_image) {
+    /* TODO(sergey): Consider making it an utility function to check
+     * whether image is considered builtin.
+     */
+    const bool is_builtin = b_image->packed_file() ||
+                            b_image->source() == BL::Image::source_GENERATED ||
+                            b_image->source() == BL::Image::source_MOVIE || b_engine.is_preview();
+    if (is_builtin == false) {
+      b_image->buffers_free();
+    }
+    /* TODO(sergey): Free builtin images not used by any shader. */
+  }
+}
+
+/* Passes */
+PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
+{
+  string name = b_pass.name();
+#define MAP_PASS(passname, passtype) \
+  if (name == passname) \
+    return passtype;
+  /* NOTE: Keep in sync with defined names from DNA_scene_types.h */
+  MAP_PASS("Combined", PASS_COMBINED);
+  MAP_PASS("Depth", PASS_DEPTH);
+  MAP_PASS("Mist", PASS_MIST);
+  MAP_PASS("Normal", PASS_NORMAL);
+  MAP_PASS("IndexOB", PASS_OBJECT_ID);
+  MAP_PASS("UV", PASS_UV);
+  MAP_PASS("Vector", PASS_MOTION);
+  MAP_PASS("IndexMA", PASS_MATERIAL_ID);
+
+  MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT);
+  MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT);
+  MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT);
+  MAP_PASS("SubsurfaceDir", PASS_SUBSURFACE_DIRECT);
+  MAP_PASS("VolumeDir", PASS_VOLUME_DIRECT);
+
+  MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT);
+  MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT);
+  MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT);
+  MAP_PASS("SubsurfaceInd", PASS_SUBSURFACE_INDIRECT);
+  MAP_PASS("VolumeInd", PASS_VOLUME_INDIRECT);
+
+  MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR);
+  MAP_PASS("GlossCol", PASS_GLOSSY_COLOR);
+  MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR);
+  MAP_PASS("SubsurfaceCol", PASS_SUBSURFACE_COLOR);
+
+  MAP_PASS("Emit", PASS_EMISSION);
+  MAP_PASS("Env", PASS_BACKGROUND);
+  MAP_PASS("AO", PASS_AO);
+  MAP_PASS("Shadow", PASS_SHADOW);
+
+#ifdef __KERNEL_DEBUG__
+  MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES);
+  MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES);
+  MAP_PASS("Debug BVH Intersections", PASS_BVH_INTERSECTIONS);
+  MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
+#endif
+  MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
+  if (string_startswith(name, cryptomatte_prefix)) {
+    return PASS_CRYPTOMATTE;
+  }
+#undef MAP_PASS
+
+  return PASS_NONE;
+}
+
+int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
+{
+  string name = b_pass.name();
+
+  if (name == "Noisy Image")
+    return DENOISING_PASS_PREFILTERED_COLOR;
+
+  if (name.substr(0, 10) != "Denoising ") {
+    return -1;
+  }
+  name = name.substr(10);
+
+#define MAP_PASS(passname, offset) \
+  if (name == passname) \
+    return offset;
+  MAP_PASS("Normal", DENOISING_PASS_PREFILTERED_NORMAL);
+  MAP_PASS("Albedo", DENOISING_PASS_PREFILTERED_ALBEDO);
+  MAP_PASS("Depth", DENOISING_PASS_PREFILTERED_DEPTH);
+  MAP_PASS("Shadowing", DENOISING_PASS_PREFILTERED_SHADOWING);
+  MAP_PASS("Variance", DENOISING_PASS_PREFILTERED_VARIANCE);
+  MAP_PASS("Intensity", DENOISING_PASS_PREFILTERED_INTENSITY);
+  MAP_PASS("Clean", DENOISING_PASS_CLEAN);
+#undef MAP_PASS
+
+  return -1;
+}
+
+vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
+{
+  vector<Pass> passes;
+
+  /* loop over passes */
+  BL::RenderLayer::passes_iterator b_pass_iter;
+
+  for (b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
+    BL::RenderPass b_pass(*b_pass_iter);
+    PassType pass_type = get_pass_type(b_pass);
+
+    if (pass_type == PASS_MOTION && scene->integrator->motion_blur)
+      continue;
+    if (pass_type != PASS_NONE)
+      Pass::add(pass_type, passes, b_pass.name().c_str());
+  }
+
+  PointerRNA crp = RNA_pointer_get(&b_view_layer.ptr, "cycles");
+  bool use_denoising = get_boolean(crp, "use_denoising");
+  bool use_optix_denoising = get_boolean(crp, "use_optix_denoising");
+  bool write_denoising_passes = get_boolean(crp, "denoising_store_passes");
+
+  scene->film->denoising_flags = 0;
+  if (use_denoising || write_denoising_passes) {
+    if (!use_optix_denoising) {
+#define MAP_OPTION(name, flag) \
+  if (!get_boolean(crp, name)) \
+    scene->film->denoising_flags |= flag;
+      MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
+      MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
+      MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR);
+      MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND);
+      MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR);
+      MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND);
+      MAP_OPTION("denoising_subsurface_direct", DENOISING_CLEAN_SUBSURFACE_DIR);
+      MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND);
+#undef MAP_OPTION
+    }
+    b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
+  }
+
+  if (write_denoising_passes) {
+    b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
+    b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
+    b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str());
+    if (!use_optix_denoising) {
+      b_engine.add_pass("Denoising Shadowing", 1, "X", b_view_layer.name().c_str());
+      b_engine.add_pass("Denoising Variance", 3, "RGB", b_view_layer.name().c_str());
+      b_engine.add_pass("Denoising Intensity", 1, "X", b_view_layer.name().c_str());
+    }
+
+    if (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) {
+      b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str());
+    }
+  }
+
+#ifdef __KERNEL_DEBUG__
+  if (get_boolean(crp, "pass_debug_bvh_traversed_nodes")) {
+    b_engine.add_pass("Debug BVH Traversed Nodes", 1, "X", b_view_layer.name().c_str());
+    Pass::add(PASS_BVH_TRAVERSED_NODES, passes, "Debug BVH Traversed Nodes");
+  }
+  if (get_boolean(crp, "pass_debug_bvh_traversed_instances")) {
+    b_engine.add_pass("Debug BVH Traversed Instances", 1, "X", b_view_layer.name().c_str());
+    Pass::add(PASS_BVH_TRAVERSED_INSTANCES, passes, "Debug BVH Traversed Instances");
+  }
+  if (get_boolean(crp, "pass_debug_bvh_intersections")) {
+    b_engine.add_pass("Debug BVH Intersections", 1, "X", b_view_layer.name().c_str());
+    Pass::add(PASS_BVH_INTERSECTIONS, passes, "Debug BVH Intersections");
+  }
+  if (get_boolean(crp, "pass_debug_ray_bounces")) {
+    b_engine.add_pass("Debug Ray Bounces", 1, "X", b_view_layer.name().c_str());
+    Pass::add(PASS_RAY_BOUNCES, passes, "Debug Ray Bounces");
+  }
+#endif
+  if (get_boolean(crp, "pass_debug_render_time")) {
+    b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
+    Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
+  }
+  if (get_boolean(crp, "use_pass_volume_direct")) {
+    b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
+    Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
+  }
+  if (get_boolean(crp, "use_pass_volume_indirect")) {
+    b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str());
+    Pass::add(PASS_VOLUME_INDIRECT, passes, "VolumeInd");
+  }
+
+  /* Cryptomatte stores two ID/weight pairs per RGBA layer.
+   * User facing parameter is the number of pairs. */
+  int crypto_depth = min(16, get_int(crp, "pass_crypto_depth")) / 2;
+  scene->film->cryptomatte_depth = crypto_depth;
+  scene->film->cryptomatte_passes = CRYPT_NONE;
+  if (get_boolean(crp, "use_pass_crypto_object")) {
+    for (int i = 0; i < crypto_depth; ++i) {
+      string passname = cryptomatte_prefix + string_printf("Object%02d", i);
+      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+      Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
+    }
+    scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
+                                                        CRYPT_OBJECT);
+  }
+  if (get_boolean(crp, "use_pass_crypto_material")) {
+    for (int i = 0; i < crypto_depth; ++i) {
+      string passname = cryptomatte_prefix + string_printf("Material%02d", i);
+      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+      Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
+    }
+    scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
+                                                        CRYPT_MATERIAL);
+  }
+  if (get_boolean(crp, "use_pass_crypto_asset")) {
+    for (int i = 0; i < crypto_depth; ++i) {
+      string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
+      b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+      Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
+    }
+    scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
+                                                        CRYPT_ASSET);
+  }
+  if (get_boolean(crp, "pass_crypto_accurate") && scene->film->cryptomatte_passes != CRYPT_NONE) {
+    scene->film->cryptomatte_passes = (CryptomatteType)(scene->film->cryptomatte_passes |
+                                                        CRYPT_ACCURATE);
+  }
+
+  RNA_BEGIN (&crp, b_aov, "aovs") {
+    bool is_color = (get_enum(b_aov, "type") == 1);
+    string name = get_string(b_aov, "name");
+
+    if (is_color) {
+      b_engine.add_pass(name.c_str(), 4, "RGBA", b_view_layer.name().c_str());
+      Pass::add(PASS_AOV_COLOR, passes, name.c_str());
+    }
+    else {
+      b_engine.add_pass(name.c_str(), 1, "X", b_view_layer.name().c_str());
+      Pass::add(PASS_AOV_VALUE, passes, name.c_str());
+    }
+  }
+  RNA_END;
+
+  return passes;
+}
+
+void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
+{
+  /* When viewport display is not needed during render we can force some
+   * caches to be releases from blender side in order to reduce peak memory
+   * footprint during synchronization process.
+   */
+  const bool is_interface_locked = b_engine.render() && b_engine.render().use_lock_interface();
+  const bool can_free_caches = BlenderSession::headless || is_interface_locked;
+  if (!can_free_caches) {
+    return;
+  }
+  /* TODO(sergey): We can actually remove the whole dependency graph,
+   * but that will need some API support first.
+   */
+  BL::Depsgraph::objects_iterator b_ob;
+  for (b_depsgraph.objects.begin(b_ob); b_ob != b_depsgraph.objects.end(); ++b_ob) {
+    b_ob->cache_release();
+  }
+}
+
+/* Scene Parameters */
+
+SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
+{
+  BL::RenderSettings r = b_scene.render();
+  SceneParams params;
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
+
+  if (shadingsystem == 0)
+    params.shadingsystem = SHADINGSYSTEM_SVM;
+  else if (shadingsystem == 1)
+    params.shadingsystem = SHADINGSYSTEM_OSL;
+
+  if (background || DebugFlags().viewport_static_bvh)
+    params.bvh_type = SceneParams::BVH_STATIC;
+  else
+    params.bvh_type = SceneParams::BVH_DYNAMIC;
+
+  params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
+  params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
+  params.num_bvh_time_steps = RNA_int_get(&cscene, "debug_bvh_time_steps");
+
+  if (background && params.shadingsystem != SHADINGSYSTEM_OSL)
+    params.persistent_data = r.use_persistent_data();
+  else
+    params.persistent_data = false;
+
+  int texture_limit;
+  if (background) {
+    texture_limit = RNA_enum_get(&cscene, "texture_limit_render");
+  }
+  else {
+    texture_limit = RNA_enum_get(&cscene, "texture_limit");
+  }
+  if (texture_limit > 0 && b_scene.render().use_simplify()) {
+    params.texture_limit = 1 << (texture_limit + 6);
+  }
+  else {
+    params.texture_limit = 0;
+  }
+
+  /* TODO(sergey): Once OSL supports per-microarchitecture optimization get
+   * rid of this.
+   */
+  if (params.shadingsystem == SHADINGSYSTEM_OSL) {
+    params.bvh_layout = BVH_LAYOUT_BVH4;
+  }
+  else {
+    params.bvh_layout = DebugFlags().cpu.bvh_layout;
+  }
+
+#ifdef WITH_EMBREE
+  params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE :
+                                                                   params.bvh_layout;
+#endif
+
+  params.background = background;
+
+  return params;
+}
+
+/* Session Parameters */
+
+bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background)
+{
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+  return (background) ? false : get_boolean(cscene, "preview_pause");
+}
+
+SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
+                                              BL::Preferences &b_preferences,
+                                              BL::Scene &b_scene,
+                                              bool background)
+{
+  SessionParams params;
+  PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
+
+  /* feature set */
+  params.experimental = (get_enum(cscene, "feature_set") != 0);
+
+  /* Background */
+  params.background = background;
+
+  /* Device */
+  params.threads = blender_device_threads(b_scene);
+  params.device = blender_device_info(b_preferences, b_scene, params.background);
+
+  /* samples */
+  int samples = get_int(cscene, "samples");
+  int aa_samples = get_int(cscene, "aa_samples");
+  int preview_samples = get_int(cscene, "preview_samples");
+  int preview_aa_samples = get_int(cscene, "preview_aa_samples");
+
+  if (get_boolean(cscene, "use_square_samples")) {
+    aa_samples = aa_samples * aa_samples;
+    preview_aa_samples = preview_aa_samples * preview_aa_samples;
+
+    samples = samples * samples;
+    preview_samples = preview_samples * preview_samples;
+  }
+
+  if (get_enum(cscene, "progressive") == 0 && (params.device.type != DEVICE_OPTIX)) {
+    if (background) {
+      params.samples = aa_samples;
+    }
+    else {
+      params.samples = preview_aa_samples;
+      if (params.samples == 0)
+        params.samples = INT_MAX;
+    }
+  }
+  else {
+    if (background) {
+      params.samples = samples;
+    }
+    else {
+      params.samples = preview_samples;
+      if (params.samples == 0)
+        params.samples = INT_MAX;
+    }
+  }
+
+  /* Clamp samples. */
+  params.samples = min(params.samples, Integrator::MAX_SAMPLES);
+
+  /* tiles */
+  const bool is_cpu = (params.device.type == DEVICE_CPU);
+  if (!is_cpu && !background) {
+    /* currently GPU could be much slower than CPU when using tiles,
+     * still need to be investigated, but meanwhile make it possible
+     * to work in viewport smoothly
+     */
+    int debug_tile_size = get_int(cscene, "debug_tile_size");
+
+    params.tile_size = make_int2(debug_tile_size, debug_tile_size);
+  }
+  else {
+    int tile_x = b_engine.tile_x();
+    int tile_y = b_engine.tile_y();
+
+    params.tile_size = make_int2(tile_x, tile_y);
+  }
+
+  if ((BlenderSession::headless == false) && background) {
+    params.tile_order = (TileOrder)get_enum(cscene, "tile_order");
+  }
+  else {
+    params.tile_order = TILE_BOTTOM_TO_TOP;
+  }
+
+  /* other parameters */
+  params.start_resolution = get_int(cscene, "preview_start_resolution");
+  params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
+
+  /* other parameters */
+  params.cancel_timeout = (double)get_float(cscene, "debug_cancel_timeout");
+  params.reset_timeout = (double)get_float(cscene, "debug_reset_timeout");
+  params.text_timeout = (double)get_float(cscene, "debug_text_timeout");
+
+  /* progressive refine */
+  BL::RenderSettings b_r = b_scene.render();
+  params.progressive_refine = (b_engine.is_preview() ||
+                               get_boolean(cscene, "use_progressive_refine")) &&
+                              !b_r.use_save_buffers();
+
+  if (params.progressive_refine) {
+    BL::Scene::view_layers_iterator b_view_layer;
+    for (b_scene.view_layers.begin(b_view_layer); b_view_layer != b_scene.view_layers.end();
+         ++b_view_layer) {
+      PointerRNA crl = RNA_pointer_get(&b_view_layer->ptr, "cycles");
+      if (get_boolean(crl, "use_denoising")) {
+        params.progressive_refine = false;
+      }
+    }
+  }
+
+  if (background) {
+    if (params.progressive_refine)
+      params.progressive = true;
+    else
+      params.progressive = false;
+
+    params.start_resolution = INT_MAX;
+    params.pixel_size = 1;
+  }
+  else
+    params.progressive = true;
+
+  /* shading system - scene level needs full refresh */
+  const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
+
+  if (shadingsystem == 0)
+    params.shadingsystem = SHADINGSYSTEM_SVM;
+  else if (shadingsystem == 1)
+    params.shadingsystem = SHADINGSYSTEM_OSL;
+
+  /* color managagement */
+  params.display_buffer_linear = b_engine.support_display_space_shader(b_scene);
+
+  if (b_engine.is_preview()) {
+    /* For preview rendering we're using same timeout as
+     * blender's job update.
+     */
+    params.progressive_update_timeout = 0.1;
+  }
+
+  params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
+                         BlenderSession::print_render_stats;
+
+  return params;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
--- a/intern/cycles/blender/blender_sync.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/blender/blender_sync.h	2020-01-10 20:42:43.457590054 +0300
@@ -70,7 +70,9 @@
                  int height,
                  void **python_thread_state);
   void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
-  vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
+  vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer,
+                                  BL::ViewLayer &b_view_layer,
+                                  bool adaptive_sampling);
   void sync_integrator();
   void sync_camera(BL::RenderSettings &b_render,
                    BL::Object &b_override,
diff -Naur a/intern/cycles/blender/blender_sync.h.orig b/intern/cycles/blender/blender_sync.h.orig
--- a/intern/cycles/blender/blender_sync.h.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/blender/blender_sync.h.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,226 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BLENDER_SYNC_H__
+#define __BLENDER_SYNC_H__
+
+#include "MEM_guardedalloc.h"
+#include "RNA_types.h"
+#include "RNA_access.h"
+#include "RNA_blender_cpp.h"
+
+#include "blender/blender_util.h"
+#include "blender/blender_viewport.h"
+
+#include "render/scene.h"
+#include "render/session.h"
+
+#include "util/util_map.h"
+#include "util/util_set.h"
+#include "util/util_transform.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Background;
+class BlenderObjectCulling;
+class BlenderViewportParameters;
+class Camera;
+class Film;
+class Light;
+class Mesh;
+class Object;
+class ParticleSystem;
+class Scene;
+class ViewLayer;
+class Shader;
+class ShaderGraph;
+class ShaderNode;
+
+class BlenderSync {
+ public:
+  BlenderSync(BL::RenderEngine &b_engine,
+              BL::BlendData &b_data,
+              BL::Scene &b_scene,
+              Scene *scene,
+              bool preview,
+              Progress &progress);
+  ~BlenderSync();
+
+  /* sync */
+  void sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
+  void sync_data(BL::RenderSettings &b_render,
+                 BL::Depsgraph &b_depsgraph,
+                 BL::SpaceView3D &b_v3d,
+                 BL::Object &b_override,
+                 int width,
+                 int height,
+                 void **python_thread_state);
+  void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
+  vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
+  void sync_integrator();
+  void sync_camera(BL::RenderSettings &b_render,
+                   BL::Object &b_override,
+                   int width,
+                   int height,
+                   const char *viewname);
+  void sync_view(BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, int width, int height);
+  inline int get_layer_samples()
+  {
+    return view_layer.samples;
+  }
+  inline int get_layer_bound_samples()
+  {
+    return view_layer.bound_samples;
+  }
+
+  /* get parameters */
+  static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
+  static SessionParams get_session_params(BL::RenderEngine &b_engine,
+                                          BL::Preferences &b_userpref,
+                                          BL::Scene &b_scene,
+                                          bool background);
+  static bool get_session_pause(BL::Scene &b_scene, bool background);
+  static BufferParams get_buffer_params(BL::RenderSettings &b_render,
+                                        BL::SpaceView3D &b_v3d,
+                                        BL::RegionView3D &b_rv3d,
+                                        Camera *cam,
+                                        int width,
+                                        int height);
+
+  static PassType get_pass_type(BL::RenderPass &b_pass);
+  static int get_denoising_pass(BL::RenderPass &b_pass);
+
+ private:
+  /* sync */
+  void sync_lights(BL::Depsgraph &b_depsgraph, bool update_all);
+  void sync_materials(BL::Depsgraph &b_depsgraph, bool update_all);
+  void sync_objects(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, float motion_time = 0.0f);
+  void sync_motion(BL::RenderSettings &b_render,
+                   BL::Depsgraph &b_depsgraph,
+                   BL::SpaceView3D &b_v3d,
+                   BL::Object &b_override,
+                   int width,
+                   int height,
+                   void **python_thread_state);
+  void sync_film(BL::SpaceView3D &b_v3d);
+  void sync_view();
+  void sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all);
+  void sync_shaders(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d);
+  void sync_curve_settings();
+
+  void sync_nodes(Shader *shader, BL::ShaderNodeTree &b_ntree);
+  Mesh *sync_mesh(BL::Depsgraph &b_depsgrpah,
+                  BL::Object &b_ob,
+                  BL::Object &b_ob_instance,
+                  bool object_updated,
+                  bool show_self,
+                  bool show_particles);
+  void sync_curves(
+      Mesh *mesh, BL::Mesh &b_mesh, BL::Object &b_ob, bool motion, int motion_step = 0);
+  Object *sync_object(BL::Depsgraph &b_depsgraph,
+                      BL::ViewLayer &b_view_layer,
+                      BL::DepsgraphObjectInstance &b_instance,
+                      float motion_time,
+                      bool show_self,
+                      bool show_particles,
+                      bool show_lights,
+                      BlenderObjectCulling &culling,
+                      bool *use_portal);
+  void sync_light(BL::Object &b_parent,
+                  int persistent_id[OBJECT_PERSISTENT_ID_SIZE],
+                  BL::Object &b_ob,
+                  BL::Object &b_ob_instance,
+                  int random_id,
+                  Transform &tfm,
+                  bool *use_portal);
+  void sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal);
+  void sync_mesh_motion(BL::Depsgraph &b_depsgraph,
+                        BL::Object &b_ob,
+                        Object *object,
+                        float motion_time);
+  void sync_camera_motion(
+      BL::RenderSettings &b_render, BL::Object &b_ob, int width, int height, float motion_time);
+
+  /* particles */
+  bool sync_dupli_particle(BL::Object &b_ob,
+                           BL::DepsgraphObjectInstance &b_instance,
+                           Object *object);
+
+  /* Images. */
+  void sync_images();
+
+  /* Early data free. */
+  void free_data_after_sync(BL::Depsgraph &b_depsgraph);
+
+  /* util */
+  void find_shader(BL::ID &id, vector<Shader *> &used_shaders, Shader *default_shader);
+  bool BKE_object_is_modified(BL::Object &b_ob);
+  bool object_is_mesh(BL::Object &b_ob);
+  bool object_is_light(BL::Object &b_ob);
+
+  /* variables */
+  BL::RenderEngine b_engine;
+  BL::BlendData b_data;
+  BL::Scene b_scene;
+
+  id_map<void *, Shader> shader_map;
+  id_map<ObjectKey, Object> object_map;
+  id_map<void *, Mesh> mesh_map;
+  id_map<ObjectKey, Light> light_map;
+  id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
+  set<Mesh *> mesh_synced;
+  set<Mesh *> mesh_motion_synced;
+  set<float> motion_times;
+  void *world_map;
+  bool world_recalc;
+  BlenderViewportParameters viewport_parameters;
+
+  Scene *scene;
+  bool preview;
+  bool experimental;
+
+  float dicing_rate;
+  int max_subdivisions;
+
+  struct RenderLayerInfo {
+    RenderLayerInfo()
+        : material_override(PointerRNA_NULL),
+          use_background_shader(true),
+          use_background_ao(true),
+          use_surfaces(true),
+          use_hair(true),
+          samples(0),
+          bound_samples(false)
+    {
+    }
+
+    string name;
+    BL::Material material_override;
+    bool use_background_shader;
+    bool use_background_ao;
+    bool use_surfaces;
+    bool use_hair;
+    int samples;
+    bool bound_samples;
+  } view_layer;
+
+  Progress &progress;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BLENDER_SYNC_H__ */
diff -Naur a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
--- a/intern/cycles/device/device_cpu.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_cpu.cpp	2020-01-10 20:42:43.457590054 +0300
@@ -34,6 +34,7 @@
 #include "kernel/kernel_types.h"
 #include "kernel/split/kernel_split_data.h"
 #include "kernel/kernel_globals.h"
+#include "kernel/kernel_adaptive_sampling.h"

 #include "kernel/filter/filter.h"

@@ -317,6 +318,10 @@
     REGISTER_SPLIT_KERNEL(next_iteration_setup);
     REGISTER_SPLIT_KERNEL(indirect_subsurface);
     REGISTER_SPLIT_KERNEL(buffer_update);
+    REGISTER_SPLIT_KERNEL(adaptive_stopping);
+    REGISTER_SPLIT_KERNEL(adaptive_filter_x);
+    REGISTER_SPLIT_KERNEL(adaptive_filter_y);
+    REGISTER_SPLIT_KERNEL(adaptive_adjust_samples);
 #undef REGISTER_SPLIT_KERNEL
 #undef KERNEL_FUNCTIONS
   }
@@ -851,10 +856,33 @@
           path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
         }
       }
-
       tile.sample = sample + 1;

       task.update_progress(&tile, tile.w * tile.h);
+
+      if (kernel_data.film.pass_adaptive_aux_buffer && (sample & 0x3) == 3 &&
+          sample >= kernel_data.integrator.adaptive_min_samples - 1) {
+        WorkTile wtile;
+        wtile.x = tile.x;
+        wtile.y = tile.y;
+        wtile.w = tile.w;
+        wtile.h = tile.h;
+        wtile.offset = tile.offset;
+        wtile.stride = tile.stride;
+        wtile.buffer = (float *)tile.buffer;
+
+        bool any = false;
+        for (int y = tile.y; y < tile.y + tile.h; ++y) {
+          any |= kernel_do_adaptive_filter_x(kg, y, &wtile);
+        }
+        for (int x = tile.x; x < tile.x + tile.w; ++x) {
+          any |= kernel_do_adaptive_filter_y(kg, x, &wtile);
+        }
+        if (!any) {
+          tile.sample = end_sample;
+          break;
+        }
+      }
     }
     if (use_coverage) {
       coverage.finalize();
@@ -931,6 +959,28 @@
         }
         else {
           path_trace(task, tile, kg);
+          if (task.integrator_adaptive && kernel_data.film.pass_adaptive_aux_buffer) {
+            float *render_buffer = (float *)tile.buffer;
+            for (int y = tile.y; y < tile.y + tile.h; y++) {
+              for (int x = tile.x; x < tile.x + tile.w; x++) {
+                int index = tile.offset + x + y * tile.stride;
+                ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
+                if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+                  buffer[kernel_data.film.pass_sample_count] =
+                      -buffer[kernel_data.film.pass_sample_count];
+                  float sample_multiplier = tile.sample /
+                                            max((float)tile.start_sample + 1.0f,
+                                                buffer[kernel_data.film.pass_sample_count]);
+                  if (sample_multiplier != 1.0f) {
+                    kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+                  }
+                }
+                else {
+                  kernel_adaptive_post_adjust(kg, buffer, tile.sample / (tile.sample - 1.0f));
+                }
+              }
+            }
+          }
         }
       }
       else if (tile.task == RenderTile::DENOISE) {
diff -Naur a/intern/cycles/device/device_cpu.cpp.orig b/intern/cycles/device/device_cpu.cpp.orig
--- a/intern/cycles/device/device_cpu.cpp.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/device/device_cpu.cpp.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,1247 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+/* So ImathMath is included before our kernel_cpu_compat. */
+#ifdef WITH_OSL
+/* So no context pollution happens from indirectly included windows.h */
+#  include "util/util_windows.h"
+#  include <OSL/oslexec.h>
+#endif
+
+#include "device/device.h"
+#include "device/device_denoising.h"
+#include "device/device_intern.h"
+#include "device/device_split_kernel.h"
+
+#include "kernel/kernel.h"
+#include "kernel/kernel_compat_cpu.h"
+#include "kernel/kernel_types.h"
+#include "kernel/split/kernel_split_data.h"
+#include "kernel/kernel_globals.h"
+
+#include "kernel/filter/filter.h"
+
+#include "kernel/osl/osl_shader.h"
+#include "kernel/osl/osl_globals.h"
+
+#include "render/buffers.h"
+#include "render/coverage.h"
+
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_logging.h"
+#include "util/util_map.h"
+#include "util/util_opengl.h"
+#include "util/util_optimization.h"
+#include "util/util_progress.h"
+#include "util/util_system.h"
+#include "util/util_thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+class CPUDevice;
+
+/* Has to be outside of the class to be shared across template instantiations. */
+static const char *logged_architecture = "";
+
+template<typename F> class KernelFunctions {
+ public:
+  KernelFunctions()
+  {
+    kernel = (F)NULL;
+  }
+
+  KernelFunctions(
+      F kernel_default, F kernel_sse2, F kernel_sse3, F kernel_sse41, F kernel_avx, F kernel_avx2)
+  {
+    const char *architecture_name = "default";
+    kernel = kernel_default;
+
+    /* Silence potential warnings about unused variables
+     * when compiling without some architectures. */
+    (void)kernel_sse2;
+    (void)kernel_sse3;
+    (void)kernel_sse41;
+    (void)kernel_avx;
+    (void)kernel_avx2;
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+    if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
+      architecture_name = "AVX2";
+      kernel = kernel_avx2;
+    }
+    else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
+        if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
+      architecture_name = "AVX";
+      kernel = kernel_avx;
+    }
+    else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+        if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
+      architecture_name = "SSE4.1";
+      kernel = kernel_sse41;
+    }
+    else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
+        if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
+      architecture_name = "SSE3";
+      kernel = kernel_sse3;
+    }
+    else
+#endif
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+        if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
+      architecture_name = "SSE2";
+      kernel = kernel_sse2;
+    }
+#else
+    {
+      /* Dummy to prevent the architecture if below become
+       * conditional when WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
+       * is not defined. */
+    }
+#endif
+
+    if (strcmp(architecture_name, logged_architecture) != 0) {
+      VLOG(1) << "Will be using " << architecture_name << " kernels.";
+      logged_architecture = architecture_name;
+    }
+  }
+
+  inline F operator()() const
+  {
+    assert(kernel);
+    return kernel;
+  }
+
+ protected:
+  F kernel;
+};
+
+class CPUSplitKernel : public DeviceSplitKernel {
+  CPUDevice *device;
+
+ public:
+  explicit CPUSplitKernel(CPUDevice *device);
+
+  virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
+                                              RenderTile &rtile,
+                                              int num_global_elements,
+                                              device_memory &kernel_globals,
+                                              device_memory &kernel_data_,
+                                              device_memory &split_data,
+                                              device_memory &ray_state,
+                                              device_memory &queue_index,
+                                              device_memory &use_queues_flag,
+                                              device_memory &work_pool_wgs);
+
+  virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
+                                                         const DeviceRequestedFeatures &);
+  virtual int2 split_kernel_local_size();
+  virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+  virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
+};
+
+class CPUDevice : public Device {
+ public:
+  TaskPool task_pool;
+  KernelGlobals kernel_globals;
+
+  device_vector<TextureInfo> texture_info;
+  bool need_texture_info;
+
+#ifdef WITH_OSL
+  OSLGlobals osl_globals;
+#endif
+
+  bool use_split_kernel;
+
+  DeviceRequestedFeatures requested_features;
+
+  KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel;
+  KernelFunctions<void (*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>
+      convert_to_half_float_kernel;
+  KernelFunctions<void (*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>
+      convert_to_byte_kernel;
+  KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)>
+      shader_kernel;
+
+  KernelFunctions<void (*)(
+      int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)>
+      filter_divide_shadow_kernel;
+  KernelFunctions<void (*)(
+      int, TileInfo *, int, int, int, int, float *, float *, float, int *, int, int)>
+      filter_get_feature_kernel;
+  KernelFunctions<void (*)(int, int, int, int *, float *, float *, int, int *)>
+      filter_write_feature_kernel;
+  KernelFunctions<void (*)(int, int, float *, float *, float *, float *, int *, int)>
+      filter_detect_outliers_kernel;
+  KernelFunctions<void (*)(int, int, float *, float *, float *, float *, int *, int)>
+      filter_combine_halves_kernel;
+
+  KernelFunctions<void (*)(
+      int, int, float *, float *, float *, float *, int *, int, int, int, float, float)>
+      filter_nlm_calc_difference_kernel;
+  KernelFunctions<void (*)(float *, float *, int *, int, int)> filter_nlm_blur_kernel;
+  KernelFunctions<void (*)(float *, float *, int *, int, int)> filter_nlm_calc_weight_kernel;
+  KernelFunctions<void (*)(
+      int, int, float *, float *, float *, float *, float *, int *, int, int, int)>
+      filter_nlm_update_output_kernel;
+  KernelFunctions<void (*)(float *, float *, int *, int)> filter_nlm_normalize_kernel;
+
+  KernelFunctions<void (*)(
+      float *, TileInfo *, int, int, int, float *, int *, int *, int, int, bool, int, float)>
+      filter_construct_transform_kernel;
+  KernelFunctions<void (*)(int,
+                           int,
+                           int,
+                           float *,
+                           float *,
+                           float *,
+                           int *,
+                           float *,
+                           float3 *,
+                           int *,
+                           int *,
+                           int,
+                           int,
+                           int,
+                           int,
+                           bool)>
+      filter_nlm_construct_gramian_kernel;
+  KernelFunctions<void (*)(int, int, int, float *, int *, float *, float3 *, int *, int)>
+      filter_finalize_kernel;
+
+  KernelFunctions<void (*)(KernelGlobals *,
+                           ccl_constant KernelData *,
+                           ccl_global void *,
+                           int,
+                           ccl_global char *,
+                           int,
+                           int,
+                           int,
+                           int,
+                           int,
+                           int,
+                           int,
+                           int,
+                           ccl_global int *,
+                           int,
+                           ccl_global char *,
+                           ccl_global unsigned int *,
+                           unsigned int,
+                           ccl_global float *)>
+      data_init_kernel;
+  unordered_map<string, KernelFunctions<void (*)(KernelGlobals *, KernelData *)>> split_kernels;
+
+#define KERNEL_FUNCTIONS(name) \
+  KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
+      KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
+      KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
+
+  CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
+      : Device(info_, stats_, profiler_, background_),
+        texture_info(this, "__texture_info", MEM_TEXTURE),
+#define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name))
+        REGISTER_KERNEL(path_trace),
+        REGISTER_KERNEL(convert_to_half_float),
+        REGISTER_KERNEL(convert_to_byte),
+        REGISTER_KERNEL(shader),
+        REGISTER_KERNEL(filter_divide_shadow),
+        REGISTER_KERNEL(filter_get_feature),
+        REGISTER_KERNEL(filter_write_feature),
+        REGISTER_KERNEL(filter_detect_outliers),
+        REGISTER_KERNEL(filter_combine_halves),
+        REGISTER_KERNEL(filter_nlm_calc_difference),
+        REGISTER_KERNEL(filter_nlm_blur),
+        REGISTER_KERNEL(filter_nlm_calc_weight),
+        REGISTER_KERNEL(filter_nlm_update_output),
+        REGISTER_KERNEL(filter_nlm_normalize),
+        REGISTER_KERNEL(filter_construct_transform),
+        REGISTER_KERNEL(filter_nlm_construct_gramian),
+        REGISTER_KERNEL(filter_finalize),
+        REGISTER_KERNEL(data_init)
+#undef REGISTER_KERNEL
+  {
+    if (info.cpu_threads == 0) {
+      info.cpu_threads = TaskScheduler::num_threads();
+    }
+
+#ifdef WITH_OSL
+    kernel_globals.osl = &osl_globals;
+#endif
+    use_split_kernel = DebugFlags().cpu.split_kernel;
+    if (use_split_kernel) {
+      VLOG(1) << "Will be using split kernel.";
+    }
+    need_texture_info = false;
+
+#define REGISTER_SPLIT_KERNEL(name) \
+  split_kernels[#name] = KernelFunctions<void (*)(KernelGlobals *, KernelData *)>( \
+      KERNEL_FUNCTIONS(name))
+    REGISTER_SPLIT_KERNEL(path_init);
+    REGISTER_SPLIT_KERNEL(scene_intersect);
+    REGISTER_SPLIT_KERNEL(lamp_emission);
+    REGISTER_SPLIT_KERNEL(do_volume);
+    REGISTER_SPLIT_KERNEL(queue_enqueue);
+    REGISTER_SPLIT_KERNEL(indirect_background);
+    REGISTER_SPLIT_KERNEL(shader_setup);
+    REGISTER_SPLIT_KERNEL(shader_sort);
+    REGISTER_SPLIT_KERNEL(shader_eval);
+    REGISTER_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao);
+    REGISTER_SPLIT_KERNEL(subsurface_scatter);
+    REGISTER_SPLIT_KERNEL(direct_lighting);
+    REGISTER_SPLIT_KERNEL(shadow_blocked_ao);
+    REGISTER_SPLIT_KERNEL(shadow_blocked_dl);
+    REGISTER_SPLIT_KERNEL(enqueue_inactive);
+    REGISTER_SPLIT_KERNEL(next_iteration_setup);
+    REGISTER_SPLIT_KERNEL(indirect_subsurface);
+    REGISTER_SPLIT_KERNEL(buffer_update);
+#undef REGISTER_SPLIT_KERNEL
+#undef KERNEL_FUNCTIONS
+  }
+
+  ~CPUDevice()
+  {
+    task_pool.stop();
+    texture_info.free();
+  }
+
+  virtual bool show_samples() const
+  {
+    return (info.cpu_threads == 1);
+  }
+
+  virtual BVHLayoutMask get_bvh_layout_mask() const
+  {
+    BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
+    if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
+      bvh_layout_mask |= BVH_LAYOUT_BVH4;
+    }
+#if defined(__x86_64__) || defined(_M_X64)
+    if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
+      bvh_layout_mask |= BVH_LAYOUT_BVH8;
+    }
+#endif
+#ifdef WITH_EMBREE
+    bvh_layout_mask |= BVH_LAYOUT_EMBREE;
+#endif /* WITH_EMBREE */
+    return bvh_layout_mask;
+  }
+
+  void load_texture_info()
+  {
+    if (need_texture_info) {
+      texture_info.copy_to_device();
+      need_texture_info = false;
+    }
+  }
+
+  void mem_alloc(device_memory &mem)
+  {
+    if (mem.type == MEM_TEXTURE) {
+      assert(!"mem_alloc not supported for textures.");
+    }
+    else {
+      if (mem.name) {
+        VLOG(1) << "Buffer allocate: " << mem.name << ", "
+                << string_human_readable_number(mem.memory_size()) << " bytes. ("
+                << string_human_readable_size(mem.memory_size()) << ")";
+      }
+
+      if (mem.type == MEM_DEVICE_ONLY) {
+        assert(!mem.host_pointer);
+        size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
+        void *data = util_aligned_malloc(mem.memory_size(), alignment);
+        mem.device_pointer = (device_ptr)data;
+      }
+      else {
+        mem.device_pointer = (device_ptr)mem.host_pointer;
+      }
+
+      mem.device_size = mem.memory_size();
+      stats.mem_alloc(mem.device_size);
+    }
+  }
+
+  void mem_copy_to(device_memory &mem)
+  {
+    if (mem.type == MEM_TEXTURE) {
+      tex_free(mem);
+      tex_alloc(mem);
+    }
+    else if (mem.type == MEM_PIXELS) {
+      assert(!"mem_copy_to not supported for pixels.");
+    }
+    else {
+      if (!mem.device_pointer) {
+        mem_alloc(mem);
+      }
+
+      /* copy is no-op */
+    }
+  }
+
+  void mem_copy_from(device_memory & /*mem*/, int /*y*/, int /*w*/, int /*h*/, int /*elem*/)
+  {
+    /* no-op */
+  }
+
+  void mem_zero(device_memory &mem)
+  {
+    if (!mem.device_pointer) {
+      mem_alloc(mem);
+    }
+
+    if (mem.device_pointer) {
+      memset((void *)mem.device_pointer, 0, mem.memory_size());
+    }
+  }
+
+  void mem_free(device_memory &mem)
+  {
+    if (mem.type == MEM_TEXTURE) {
+      tex_free(mem);
+    }
+    else if (mem.device_pointer) {
+      if (mem.type == MEM_DEVICE_ONLY) {
+        util_aligned_free((void *)mem.device_pointer);
+      }
+      mem.device_pointer = 0;
+      stats.mem_free(mem.device_size);
+      mem.device_size = 0;
+    }
+  }
+
+  virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
+  {
+    return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
+  }
+
+  void const_copy_to(const char *name, void *host, size_t size)
+  {
+    kernel_const_copy(&kernel_globals, name, host, size);
+  }
+
+  void tex_alloc(device_memory &mem)
+  {
+    VLOG(1) << "Texture allocate: " << mem.name << ", "
+            << string_human_readable_number(mem.memory_size()) << " bytes. ("
+            << string_human_readable_size(mem.memory_size()) << ")";
+
+    if (mem.interpolation == INTERPOLATION_NONE) {
+      /* Data texture. */
+      kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
+    }
+    else {
+      /* Image Texture. */
+      int flat_slot = 0;
+      if (string_startswith(mem.name, "__tex_image")) {
+        int pos = string(mem.name).rfind("_");
+        flat_slot = atoi(mem.name + pos + 1);
+      }
+      else {
+        assert(0);
+      }
+
+      if (flat_slot >= texture_info.size()) {
+        /* Allocate some slots in advance, to reduce amount
+         * of re-allocations. */
+        texture_info.resize(flat_slot + 128);
+      }
+
+      TextureInfo &info = texture_info[flat_slot];
+      info.data = (uint64_t)mem.host_pointer;
+      info.cl_buffer = 0;
+      info.interpolation = mem.interpolation;
+      info.extension = mem.extension;
+      info.width = mem.data_width;
+      info.height = mem.data_height;
+      info.depth = mem.data_depth;
+
+      need_texture_info = true;
+    }
+
+    mem.device_pointer = (device_ptr)mem.host_pointer;
+    mem.device_size = mem.memory_size();
+    stats.mem_alloc(mem.device_size);
+  }
+
+  void tex_free(device_memory &mem)
+  {
+    if (mem.device_pointer) {
+      mem.device_pointer = 0;
+      stats.mem_free(mem.device_size);
+      mem.device_size = 0;
+      need_texture_info = true;
+    }
+  }
+
+  void *osl_memory()
+  {
+#ifdef WITH_OSL
+    return &osl_globals;
+#else
+    return NULL;
+#endif
+  }
+
+  void thread_run(DeviceTask *task)
+  {
+    if (task->type == DeviceTask::RENDER) {
+      thread_render(*task);
+    }
+    else if (task->type == DeviceTask::FILM_CONVERT)
+      thread_film_convert(*task);
+    else if (task->type == DeviceTask::SHADER)
+      thread_shader(*task);
+  }
+
+  class CPUDeviceTask : public DeviceTask {
+   public:
+    CPUDeviceTask(CPUDevice *device, DeviceTask &task) : DeviceTask(task)
+    {
+      run = function_bind(&CPUDevice::thread_run, device, this);
+    }
+  };
+
+  bool denoising_non_local_means(device_ptr image_ptr,
+                                 device_ptr guide_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr out_ptr,
+                                 DenoisingTask *task)
+  {
+    ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_NON_LOCAL_MEANS);
+
+    int4 rect = task->rect;
+    int r = task->nlm_state.r;
+    int f = task->nlm_state.f;
+    float a = task->nlm_state.a;
+    float k_2 = task->nlm_state.k_2;
+
+    int w = align_up(rect.z - rect.x, 4);
+    int h = rect.w - rect.y;
+    int stride = task->buffer.stride;
+    int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
+
+    float *temporary_mem = (float *)task->buffer.temporary_mem.device_pointer;
+    float *blurDifference = temporary_mem;
+    float *difference = temporary_mem + task->buffer.pass_stride;
+    float *weightAccum = temporary_mem + 2 * task->buffer.pass_stride;
+
+    memset(weightAccum, 0, sizeof(float) * w * h);
+    memset((float *)out_ptr, 0, sizeof(float) * w * h);
+
+    for (int i = 0; i < (2 * r + 1) * (2 * r + 1); i++) {
+      int dy = i / (2 * r + 1) - r;
+      int dx = i % (2 * r + 1) - r;
+
+      int local_rect[4] = {
+          max(0, -dx), max(0, -dy), rect.z - rect.x - max(0, dx), rect.w - rect.y - max(0, dy)};
+      filter_nlm_calc_difference_kernel()(dx,
+                                          dy,
+                                          (float *)guide_ptr,
+                                          (float *)variance_ptr,
+                                          NULL,
+                                          difference,
+                                          local_rect,
+                                          w,
+                                          channel_offset,
+                                          0,
+                                          a,
+                                          k_2);
+
+      filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
+      filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f);
+      filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
+
+      filter_nlm_update_output_kernel()(dx,
+                                        dy,
+                                        blurDifference,
+                                        (float *)image_ptr,
+                                        difference,
+                                        (float *)out_ptr,
+                                        weightAccum,
+                                        local_rect,
+                                        channel_offset,
+                                        stride,
+                                        f);
+    }
+
+    int local_rect[4] = {0, 0, rect.z - rect.x, rect.w - rect.y};
+    filter_nlm_normalize_kernel()((float *)out_ptr, weightAccum, local_rect, w);
+
+    return true;
+  }
+
+  bool denoising_construct_transform(DenoisingTask *task)
+  {
+    ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_CONSTRUCT_TRANSFORM);
+
+    for (int y = 0; y < task->filter_area.w; y++) {
+      for (int x = 0; x < task->filter_area.z; x++) {
+        filter_construct_transform_kernel()((float *)task->buffer.mem.device_pointer,
+                                            task->tile_info,
+                                            x + task->filter_area.x,
+                                            y + task->filter_area.y,
+                                            y * task->filter_area.z + x,
+                                            (float *)task->storage.transform.device_pointer,
+                                            (int *)task->storage.rank.device_pointer,
+                                            &task->rect.x,
+                                            task->buffer.pass_stride,
+                                            task->buffer.frame_stride,
+                                            task->buffer.use_time,
+                                            task->radius,
+                                            task->pca_threshold);
+      }
+    }
+    return true;
+  }
+
+  bool denoising_accumulate(device_ptr color_ptr,
+                            device_ptr color_variance_ptr,
+                            device_ptr scale_ptr,
+                            int frame,
+                            DenoisingTask *task)
+  {
+    ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_RECONSTRUCT);
+
+    float *temporary_mem = (float *)task->buffer.temporary_mem.device_pointer;
+    float *difference = temporary_mem;
+    float *blurDifference = temporary_mem + task->buffer.pass_stride;
+
+    int r = task->radius;
+    int frame_offset = frame * task->buffer.frame_stride;
+    for (int i = 0; i < (2 * r + 1) * (2 * r + 1); i++) {
+      int dy = i / (2 * r + 1) - r;
+      int dx = i % (2 * r + 1) - r;
+
+      int local_rect[4] = {max(0, -dx),
+                           max(0, -dy),
+                           task->reconstruction_state.source_w - max(0, dx),
+                           task->reconstruction_state.source_h - max(0, dy)};
+      filter_nlm_calc_difference_kernel()(dx,
+                                          dy,
+                                          (float *)color_ptr,
+                                          (float *)color_variance_ptr,
+                                          (float *)scale_ptr,
+                                          difference,
+                                          local_rect,
+                                          task->buffer.stride,
+                                          task->buffer.pass_stride,
+                                          frame_offset,
+                                          1.0f,
+                                          task->nlm_k_2);
+      filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
+      filter_nlm_calc_weight_kernel()(
+          blurDifference, difference, local_rect, task->buffer.stride, 4);
+      filter_nlm_blur_kernel()(difference, blurDifference, local_rect, task->buffer.stride, 4);
+      filter_nlm_construct_gramian_kernel()(dx,
+                                            dy,
+                                            task->tile_info->frames[frame],
+                                            blurDifference,
+                                            (float *)task->buffer.mem.device_pointer,
+                                            (float *)task->storage.transform.device_pointer,
+                                            (int *)task->storage.rank.device_pointer,
+                                            (float *)task->storage.XtWX.device_pointer,
+                                            (float3 *)task->storage.XtWY.device_pointer,
+                                            local_rect,
+                                            &task->reconstruction_state.filter_window.x,
+                                            task->buffer.stride,
+                                            4,
+                                            task->buffer.pass_stride,
+                                            frame_offset,
+                                            task->buffer.use_time);
+    }
+
+    return true;
+  }
+
+  bool denoising_solve(device_ptr output_ptr, DenoisingTask *task)
+  {
+    for (int y = 0; y < task->filter_area.w; y++) {
+      for (int x = 0; x < task->filter_area.z; x++) {
+        filter_finalize_kernel()(x,
+                                 y,
+                                 y * task->filter_area.z + x,
+                                 (float *)output_ptr,
+                                 (int *)task->storage.rank.device_pointer,
+                                 (float *)task->storage.XtWX.device_pointer,
+                                 (float3 *)task->storage.XtWY.device_pointer,
+                                 &task->reconstruction_state.buffer_params.x,
+                                 task->render_buffer.samples);
+      }
+    }
+    return true;
+  }
+
+  bool denoising_combine_halves(device_ptr a_ptr,
+                                device_ptr b_ptr,
+                                device_ptr mean_ptr,
+                                device_ptr variance_ptr,
+                                int r,
+                                int4 rect,
+                                DenoisingTask *task)
+  {
+    ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_COMBINE_HALVES);
+
+    for (int y = rect.y; y < rect.w; y++) {
+      for (int x = rect.x; x < rect.z; x++) {
+        filter_combine_halves_kernel()(x,
+                                       y,
+                                       (float *)mean_ptr,
+                                       (float *)variance_ptr,
+                                       (float *)a_ptr,
+                                       (float *)b_ptr,
+                                       &rect.x,
+                                       r);
+      }
+    }
+    return true;
+  }
+
+  bool denoising_divide_shadow(device_ptr a_ptr,
+                               device_ptr b_ptr,
+                               device_ptr sample_variance_ptr,
+                               device_ptr sv_variance_ptr,
+                               device_ptr buffer_variance_ptr,
+                               DenoisingTask *task)
+  {
+    ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DIVIDE_SHADOW);
+
+    for (int y = task->rect.y; y < task->rect.w; y++) {
+      for (int x = task->rect.x; x < task->rect.z; x++) {
+        filter_divide_shadow_kernel()(task->render_buffer.samples,
+                                      task->tile_info,
+                                      x,
+                                      y,
+                                      (float *)a_ptr,
+                                      (float *)b_ptr,
+                                      (float *)sample_variance_ptr,
+                                      (float *)sv_variance_ptr,
+                                      (float *)buffer_variance_ptr,
+                                      &task->rect.x,
+                                      task->render_buffer.pass_stride,
+                                      task->render_buffer.offset);
+      }
+    }
+    return true;
+  }
+
+  bool denoising_get_feature(int mean_offset,
+                             int variance_offset,
+                             device_ptr mean_ptr,
+                             device_ptr variance_ptr,
+                             float scale,
+                             DenoisingTask *task)
+  {
+    ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_GET_FEATURE);
+
+    for (int y = task->rect.y; y < task->rect.w; y++) {
+      for (int x = task->rect.x; x < task->rect.z; x++) {
+        filter_get_feature_kernel()(task->render_buffer.samples,
+                                    task->tile_info,
+                                    mean_offset,
+                                    variance_offset,
+                                    x,
+                                    y,
+                                    (float *)mean_ptr,
+                                    (float *)variance_ptr,
+                                    scale,
+                                    &task->rect.x,
+                                    task->render_buffer.pass_stride,
+                                    task->render_buffer.offset);
+      }
+    }
+    return true;
+  }
+
+  bool denoising_write_feature(int out_offset,
+                               device_ptr from_ptr,
+                               device_ptr buffer_ptr,
+                               DenoisingTask *task)
+  {
+    for (int y = 0; y < task->filter_area.w; y++) {
+      for (int x = 0; x < task->filter_area.z; x++) {
+        filter_write_feature_kernel()(task->render_buffer.samples,
+                                      x + task->filter_area.x,
+                                      y + task->filter_area.y,
+                                      &task->reconstruction_state.buffer_params.x,
+                                      (float *)from_ptr,
+                                      (float *)buffer_ptr,
+                                      out_offset,
+                                      &task->rect.x);
+      }
+    }
+    return true;
+  }
+
+  bool denoising_detect_outliers(device_ptr image_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr depth_ptr,
+                                 device_ptr output_ptr,
+                                 DenoisingTask *task)
+  {
+    ProfilingHelper profiling(task->profiler, PROFILING_DENOISING_DETECT_OUTLIERS);
+
+    for (int y = task->rect.y; y < task->rect.w; y++) {
+      for (int x = task->rect.x; x < task->rect.z; x++) {
+        filter_detect_outliers_kernel()(x,
+                                        y,
+                                        (float *)image_ptr,
+                                        (float *)variance_ptr,
+                                        (float *)depth_ptr,
+                                        (float *)output_ptr,
+                                        &task->rect.x,
+                                        task->buffer.pass_stride);
+      }
+    }
+    return true;
+  }
+
+  void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
+  {
+    const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
+
+    scoped_timer timer(&tile.buffers->render_time);
+
+    Coverage coverage(kg, tile);
+    if (use_coverage) {
+      coverage.init_path_trace();
+    }
+
+    float *render_buffer = (float *)tile.buffer;
+    int start_sample = tile.start_sample;
+    int end_sample = tile.start_sample + tile.num_samples;
+
+    /* Needed for Embree. */
+    SIMD_SET_FLUSH_TO_ZERO;
+
+    for (int sample = start_sample; sample < end_sample; sample++) {
+      if (task.get_cancel() || task_pool.canceled()) {
+        if (task.need_finish_queue == false)
+          break;
+      }
+
+      for (int y = tile.y; y < tile.y + tile.h; y++) {
+        for (int x = tile.x; x < tile.x + tile.w; x++) {
+          if (use_coverage) {
+            coverage.init_pixel(x, y);
+          }
+          path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+        }
+      }
+
+      tile.sample = sample + 1;
+
+      task.update_progress(&tile, tile.w * tile.h);
+    }
+    if (use_coverage) {
+      coverage.finalize();
+    }
+  }
+
+  void denoise(DenoisingTask &denoising, RenderTile &tile)
+  {
+    ProfilingHelper profiling(denoising.profiler, PROFILING_DENOISING);
+
+    tile.sample = tile.start_sample + tile.num_samples;
+
+    denoising.functions.construct_transform = function_bind(
+        &CPUDevice::denoising_construct_transform, this, &denoising);
+    denoising.functions.accumulate = function_bind(
+        &CPUDevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
+    denoising.functions.solve = function_bind(&CPUDevice::denoising_solve, this, _1, &denoising);
+    denoising.functions.divide_shadow = function_bind(
+        &CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
+    denoising.functions.non_local_means = function_bind(
+        &CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
+    denoising.functions.combine_halves = function_bind(
+        &CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
+    denoising.functions.get_feature = function_bind(
+        &CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
+    denoising.functions.write_feature = function_bind(
+        &CPUDevice::denoising_write_feature, this, _1, _2, _3, &denoising);
+    denoising.functions.detect_outliers = function_bind(
+        &CPUDevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
+
+    denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
+    denoising.render_buffer.samples = tile.sample;
+    denoising.buffer.gpu_temporary_mem = false;
+
+    denoising.run_denoising(&tile);
+  }
+
+  void thread_render(DeviceTask &task)
+  {
+    if (task_pool.canceled()) {
+      if (task.need_finish_queue == false)
+        return;
+    }
+
+    /* allocate buffer for kernel globals */
+    device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals");
+    kgbuffer.alloc_to_device(1);
+
+    KernelGlobals *kg = new ((void *)kgbuffer.device_pointer)
+        KernelGlobals(thread_kernel_globals_init());
+
+    profiler.add_state(&kg->profiler);
+
+    CPUSplitKernel *split_kernel = NULL;
+    if (use_split_kernel) {
+      split_kernel = new CPUSplitKernel(this);
+      if (!split_kernel->load_kernels(requested_features)) {
+        thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
+        kgbuffer.free();
+        delete split_kernel;
+        return;
+      }
+    }
+
+    RenderTile tile;
+    DenoisingTask denoising(this, task);
+    denoising.profiler = &kg->profiler;
+
+    while (task.acquire_tile(this, tile)) {
+      if (tile.task == RenderTile::PATH_TRACE) {
+        if (use_split_kernel) {
+          device_only_memory<uchar> void_buffer(this, "void_buffer");
+          split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
+        }
+        else {
+          path_trace(task, tile, kg);
+        }
+      }
+      else if (tile.task == RenderTile::DENOISE) {
+        denoise(denoising, tile);
+        task.update_progress(&tile, tile.w * tile.h);
+      }
+
+      task.release_tile(tile);
+
+      if (task_pool.canceled()) {
+        if (task.need_finish_queue == false)
+          break;
+      }
+    }
+
+    profiler.remove_state(&kg->profiler);
+
+    thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
+    kg->~KernelGlobals();
+    kgbuffer.free();
+    delete split_kernel;
+  }
+
+  void thread_film_convert(DeviceTask &task)
+  {
+    float sample_scale = 1.0f / (task.sample + 1);
+
+    if (task.rgba_half) {
+      for (int y = task.y; y < task.y + task.h; y++)
+        for (int x = task.x; x < task.x + task.w; x++)
+          convert_to_half_float_kernel()(&kernel_globals,
+                                         (uchar4 *)task.rgba_half,
+                                         (float *)task.buffer,
+                                         sample_scale,
+                                         x,
+                                         y,
+                                         task.offset,
+                                         task.stride);
+    }
+    else {
+      for (int y = task.y; y < task.y + task.h; y++)
+        for (int x = task.x; x < task.x + task.w; x++)
+          convert_to_byte_kernel()(&kernel_globals,
+                                   (uchar4 *)task.rgba_byte,
+                                   (float *)task.buffer,
+                                   sample_scale,
+                                   x,
+                                   y,
+                                   task.offset,
+                                   task.stride);
+    }
+  }
+
+  void thread_shader(DeviceTask &task)
+  {
+    KernelGlobals *kg = new KernelGlobals(thread_kernel_globals_init());
+
+    for (int sample = 0; sample < task.num_samples; sample++) {
+      for (int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
+        shader_kernel()(kg,
+                        (uint4 *)task.shader_input,
+                        (float4 *)task.shader_output,
+                        task.shader_eval_type,
+                        task.shader_filter,
+                        x,
+                        task.offset,
+                        sample);
+
+      if (task.get_cancel() || task_pool.canceled())
+        break;
+
+      task.update_progress(NULL);
+    }
+
+    thread_kernel_globals_free(kg);
+    delete kg;
+  }
+
+  int get_split_task_count(DeviceTask &task)
+  {
+    if (task.type == DeviceTask::SHADER)
+      return task.get_subtask_count(info.cpu_threads, 256);
+    else
+      return task.get_subtask_count(info.cpu_threads);
+  }
+
+  void task_add(DeviceTask &task)
+  {
+    /* Load texture info. */
+    load_texture_info();
+
+    /* split task into smaller ones */
+    list<DeviceTask> tasks;
+
+    if (task.type == DeviceTask::SHADER)
+      task.split(tasks, info.cpu_threads, 256);
+    else
+      task.split(tasks, info.cpu_threads);
+
+    foreach (DeviceTask &task, tasks)
+      task_pool.push(new CPUDeviceTask(this, task));
+  }
+
+  void task_wait()
+  {
+    task_pool.wait_work();
+  }
+
+  void task_cancel()
+  {
+    task_pool.cancel();
+  }
+
+ protected:
+  inline KernelGlobals thread_kernel_globals_init()
+  {
+    KernelGlobals kg = kernel_globals;
+    kg.transparent_shadow_intersections = NULL;
+    const int decoupled_count = sizeof(kg.decoupled_volume_steps) /
+                                sizeof(*kg.decoupled_volume_steps);
+    for (int i = 0; i < decoupled_count; ++i) {
+      kg.decoupled_volume_steps[i] = NULL;
+    }
+    kg.decoupled_volume_steps_index = 0;
+    kg.coverage_asset = kg.coverage_object = kg.coverage_material = NULL;
+#ifdef WITH_OSL
+    OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
+#endif
+    return kg;
+  }
+
+  inline void thread_kernel_globals_free(KernelGlobals *kg)
+  {
+    if (kg == NULL) {
+      return;
+    }
+
+    if (kg->transparent_shadow_intersections != NULL) {
+      free(kg->transparent_shadow_intersections);
+    }
+    const int decoupled_count = sizeof(kg->decoupled_volume_steps) /
+                                sizeof(*kg->decoupled_volume_steps);
+    for (int i = 0; i < decoupled_count; ++i) {
+      if (kg->decoupled_volume_steps[i] != NULL) {
+        free(kg->decoupled_volume_steps[i]);
+      }
+    }
+#ifdef WITH_OSL
+    OSLShader::thread_free(kg);
+#endif
+  }
+
+  virtual bool load_kernels(const DeviceRequestedFeatures &requested_features_)
+  {
+    requested_features = requested_features_;
+
+    return true;
+  }
+};
+
+/* split kernel */
+
+class CPUSplitKernelFunction : public SplitKernelFunction {
+ public:
+  CPUDevice *device;
+  void (*func)(KernelGlobals *kg, KernelData *data);
+
+  CPUSplitKernelFunction(CPUDevice *device) : device(device), func(NULL)
+  {
+  }
+  ~CPUSplitKernelFunction()
+  {
+  }
+
+  virtual bool enqueue(const KernelDimensions &dim,
+                       device_memory &kernel_globals,
+                       device_memory &data)
+  {
+    if (!func) {
+      return false;
+    }
+
+    KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer;
+    kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
+
+    for (int y = 0; y < dim.global_size[1]; y++) {
+      for (int x = 0; x < dim.global_size[0]; x++) {
+        kg->global_id = make_int2(x, y);
+
+        func(kg, (KernelData *)data.device_pointer);
+      }
+    }
+
+    return true;
+  }
+};
+
+CPUSplitKernel::CPUSplitKernel(CPUDevice *device) : DeviceSplitKernel(device), device(device)
+{
+}
+
+bool CPUSplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim,
+                                                    RenderTile &rtile,
+                                                    int num_global_elements,
+                                                    device_memory &kernel_globals,
+                                                    device_memory &data,
+                                                    device_memory &split_data,
+                                                    device_memory &ray_state,
+                                                    device_memory &queue_index,
+                                                    device_memory &use_queues_flags,
+                                                    device_memory &work_pool_wgs)
+{
+  KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer;
+  kg->global_size = make_int2(dim.global_size[0], dim.global_size[1]);
+
+  for (int y = 0; y < dim.global_size[1]; y++) {
+    for (int x = 0; x < dim.global_size[0]; x++) {
+      kg->global_id = make_int2(x, y);
+
+      device->data_init_kernel()((KernelGlobals *)kernel_globals.device_pointer,
+                                 (KernelData *)data.device_pointer,
+                                 (void *)split_data.device_pointer,
+                                 num_global_elements,
+                                 (char *)ray_state.device_pointer,
+                                 rtile.start_sample,
+                                 rtile.start_sample + rtile.num_samples,
+                                 rtile.x,
+                                 rtile.y,
+                                 rtile.w,
+                                 rtile.h,
+                                 rtile.offset,
+                                 rtile.stride,
+                                 (int *)queue_index.device_pointer,
+                                 dim.global_size[0] * dim.global_size[1],
+                                 (char *)use_queues_flags.device_pointer,
+                                 (uint *)work_pool_wgs.device_pointer,
+                                 rtile.num_samples,
+                                 (float *)rtile.buffer);
+    }
+  }
+
+  return true;
+}
+
+SplitKernelFunction *CPUSplitKernel::get_split_kernel_function(const string &kernel_name,
+                                                               const DeviceRequestedFeatures &)
+{
+  CPUSplitKernelFunction *kernel = new CPUSplitKernelFunction(device);
+
+  kernel->func = device->split_kernels[kernel_name]();
+  if (!kernel->func) {
+    delete kernel;
+    return NULL;
+  }
+
+  return kernel;
+}
+
+int2 CPUSplitKernel::split_kernel_local_size()
+{
+  return make_int2(1, 1);
+}
+
+int2 CPUSplitKernel::split_kernel_global_size(device_memory & /*kg*/,
+                                              device_memory & /*data*/,
+                                              DeviceTask * /*task*/)
+{
+  return make_int2(1, 1);
+}
+
+uint64_t CPUSplitKernel::state_buffer_size(device_memory &kernel_globals,
+                                           device_memory & /*data*/,
+                                           size_t num_threads)
+{
+  KernelGlobals *kg = (KernelGlobals *)kernel_globals.device_pointer;
+
+  return split_data_buffer_size(kg, num_threads);
+}
+
+Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
+{
+  return new CPUDevice(info, stats, profiler, background);
+}
+
+void device_cpu_info(vector<DeviceInfo> &devices)
+{
+  DeviceInfo info;
+
+  info.type = DEVICE_CPU;
+  info.description = system_cpu_brand_string();
+  info.id = "CPU";
+  info.num = 0;
+  info.has_volume_decoupled = true;
+  info.has_osl = true;
+  info.has_half_images = true;
+  info.has_profiling = true;
+
+  devices.insert(devices.begin(), info);
+}
+
+string device_cpu_capabilities()
+{
+  string capabilities = "";
+  capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
+  capabilities += system_cpu_support_sse3() ? "SSE3 " : "";
+  capabilities += system_cpu_support_sse41() ? "SSE41 " : "";
+  capabilities += system_cpu_support_avx() ? "AVX " : "";
+  capabilities += system_cpu_support_avx2() ? "AVX2" : "";
+  if (capabilities[capabilities.size() - 1] == ' ')
+    capabilities.resize(capabilities.size() - 1);
+  return capabilities;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
--- a/intern/cycles/device/device_cuda.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_cuda.cpp	2020-01-10 20:42:43.460923388 +0300
@@ -1788,6 +1788,23 @@

     cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));

+    /* Kernels for adaptive sampling. */
+    CUfunction cuAdaptiveStopping, cuAdaptiveFilterX, cuAdaptiveFilterY, cuAdaptiveScaleSamples;
+    if (task.integrator_adaptive) {
+      cuda_assert(
+          cuModuleGetFunction(&cuAdaptiveStopping, cuModule, "kernel_cuda_adaptive_stopping"));
+      cuda_assert(cuFuncSetCacheConfig(cuAdaptiveStopping, CU_FUNC_CACHE_PREFER_L1));
+      cuda_assert(
+          cuModuleGetFunction(&cuAdaptiveFilterX, cuModule, "kernel_cuda_adaptive_filter_x"));
+      cuda_assert(cuFuncSetCacheConfig(cuAdaptiveFilterX, CU_FUNC_CACHE_PREFER_L1));
+      cuda_assert(
+          cuModuleGetFunction(&cuAdaptiveFilterY, cuModule, "kernel_cuda_adaptive_filter_y"));
+      cuda_assert(cuFuncSetCacheConfig(cuAdaptiveFilterY, CU_FUNC_CACHE_PREFER_L1));
+      cuda_assert(cuModuleGetFunction(
+          &cuAdaptiveScaleSamples, cuModule, "kernel_cuda_adaptive_scale_samples"));
+      cuda_assert(cuFuncSetCacheConfig(cuAdaptiveScaleSamples, CU_FUNC_CACHE_PREFER_L1));
+    }
+
     /* Allocate work tile. */
     work_tiles.alloc(1);

@@ -1812,6 +1829,16 @@

     uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);

+    if (task.integrator_adaptive) {
+      /* Force to either 1, 2 or multiple of 4 samples per kernel invocation. */
+      if (step_samples == 3) {
+        step_samples = 2;
+      }
+      else if (step_samples > 4) {
+        step_samples &= 0xfffffffc;
+      }
+    }
+
     /* Render all samples. */
     int start_sample = rtile.start_sample;
     int end_sample = rtile.start_sample + rtile.num_samples;
@@ -1832,6 +1859,26 @@
       cuda_assert(cuLaunchKernel(
           cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));

+      uint filter_sample = sample + wtile->num_samples - 1;
+      /* Run the adaptive sampling kernels when we're at a multiple of 4 samples.
+       * These are a series of tiny kernels because there is no grid synchronisation
+       * from within a kernel, so multiple kernel launches it is. */
+      if (task.integrator_adaptive && (filter_sample & 0x3) == 3) {
+        total_work_size = wtile->h * wtile->w;
+        void *args2[] = {&d_work_tiles, &filter_sample, &total_work_size};
+        num_blocks = divide_up(total_work_size, num_threads_per_block);
+        cuda_assert(cuLaunchKernel(
+            cuAdaptiveStopping, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0));
+        total_work_size = wtile->h;
+        num_blocks = divide_up(total_work_size, num_threads_per_block);
+        cuda_assert(cuLaunchKernel(
+            cuAdaptiveFilterX, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0));
+        total_work_size = wtile->w;
+        num_blocks = divide_up(total_work_size, num_threads_per_block);
+        cuda_assert(cuLaunchKernel(
+            cuAdaptiveFilterY, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0));
+      }
+
       cuda_assert(cuCtxSynchronize());

       /* Update progress. */
@@ -1843,6 +1890,17 @@
           break;
       }
     }
+
+    if (task.integrator_adaptive) {
+      CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
+      uint total_work_size = wtile->h * wtile->w;
+      void *args[] = {&d_work_tiles, &rtile.start_sample, &rtile.sample, &total_work_size};
+      uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+      cuda_assert(cuLaunchKernel(
+          cuAdaptiveScaleSamples, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+      cuda_assert(cuCtxSynchronize());
+      task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+    }
   }

   void film_convert(DeviceTask &task,
diff -Naur a/intern/cycles/device/device_cuda.cpp.orig b/intern/cycles/device/device_cuda.cpp.orig
--- a/intern/cycles/device/device_cuda.cpp.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/device/device_cuda.cpp.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,2846 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <climits>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "device/device.h"
+#include "device/device_denoising.h"
+#include "device/device_intern.h"
+#include "device/device_split_kernel.h"
+
+#include "render/buffers.h"
+
+#include "kernel/filter/filter_defines.h"
+
+#ifdef WITH_CUDA_DYNLOAD
+#  include "cuew.h"
+#else
+#  include "util/util_opengl.h"
+#  include <cuda.h>
+#  include <cudaGL.h>
+#endif
+#include "util/util_debug.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_map.h"
+#include "util/util_md5.h"
+#include "util/util_opengl.h"
+#include "util/util_path.h"
+#include "util/util_string.h"
+#include "util/util_system.h"
+#include "util/util_types.h"
+#include "util/util_time.h"
+#include "util/util_windows.h"
+
+#include "kernel/split/kernel_split_data_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef WITH_CUDA_DYNLOAD
+
+/* Transparently implement some functions, so majority of the file does not need
+ * to worry about difference between dynamically loaded and linked CUDA at all.
+ */
+
+namespace {
+
+const char *cuewErrorString(CUresult result)
+{
+  /* We can only give error code here without major code duplication, that
+   * should be enough since dynamic loading is only being disabled by folks
+   * who knows what they're doing anyway.
+   *
+   * NOTE: Avoid call from several threads.
+   */
+  static string error;
+  error = string_printf("%d", result);
+  return error.c_str();
+}
+
+const char *cuewCompilerPath()
+{
+  return CYCLES_CUDA_NVCC_EXECUTABLE;
+}
+
+int cuewCompilerVersion()
+{
+  return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10);
+}
+
+} /* namespace */
+#endif /* WITH_CUDA_DYNLOAD */
+
+class CUDADevice;
+
+class CUDASplitKernel : public DeviceSplitKernel {
+  CUDADevice *device;
+
+ public:
+  explicit CUDASplitKernel(CUDADevice *device);
+
+  virtual uint64_t state_buffer_size(device_memory &kg, device_memory &data, size_t num_threads);
+
+  virtual bool enqueue_split_kernel_data_init(const KernelDimensions &dim,
+                                              RenderTile &rtile,
+                                              int num_global_elements,
+                                              device_memory &kernel_globals,
+                                              device_memory &kernel_data_,
+                                              device_memory &split_data,
+                                              device_memory &ray_state,
+                                              device_memory &queue_index,
+                                              device_memory &use_queues_flag,
+                                              device_memory &work_pool_wgs);
+
+  virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
+                                                         const DeviceRequestedFeatures &);
+  virtual int2 split_kernel_local_size();
+  virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+};
+
+/* Utility to push/pop CUDA context. */
+class CUDAContextScope {
+ public:
+  CUDAContextScope(CUDADevice *device);
+  ~CUDAContextScope();
+
+ private:
+  CUDADevice *device;
+};
+
+class CUDADevice : public Device {
+ public:
+  DedicatedTaskPool task_pool;
+  CUdevice cuDevice;
+  CUcontext cuContext;
+  CUmodule cuModule, cuFilterModule;
+  size_t device_texture_headroom;
+  size_t device_working_headroom;
+  bool move_texture_to_host;
+  size_t map_host_used;
+  size_t map_host_limit;
+  int can_map_host;
+  int cuDevId;
+  int cuDevArchitecture;
+  bool first_error;
+  CUDASplitKernel *split_kernel;
+
+  struct CUDAMem {
+    CUDAMem() : texobject(0), array(0), use_mapped_host(false)
+    {
+    }
+
+    CUtexObject texobject;
+    CUarray array;
+
+    /* If true, a mapped host memory in shared_pointer is being used. */
+    bool use_mapped_host;
+  };
+  typedef map<device_memory *, CUDAMem> CUDAMemMap;
+  CUDAMemMap cuda_mem_map;
+
+  struct PixelMem {
+    GLuint cuPBO;
+    CUgraphicsResource cuPBOresource;
+    GLuint cuTexId;
+    int w, h;
+  };
+  map<device_ptr, PixelMem> pixel_mem_map;
+
+  /* Bindless Textures */
+  device_vector<TextureInfo> texture_info;
+  bool need_texture_info;
+
+  CUdeviceptr cuda_device_ptr(device_ptr mem)
+  {
+    return (CUdeviceptr)mem;
+  }
+
+  static bool have_precompiled_kernels()
+  {
+    string cubins_path = path_get("lib");
+    return path_exists(cubins_path);
+  }
+
+  virtual bool show_samples() const
+  {
+    /* The CUDADevice only processes one tile at a time, so showing samples is fine. */
+    return true;
+  }
+
+  virtual BVHLayoutMask get_bvh_layout_mask() const
+  {
+    return BVH_LAYOUT_BVH2;
+  }
+
+  /*#ifdef NDEBUG
+#define cuda_abort()
+#else
+#define cuda_abort() abort()
+#endif*/
+  void cuda_error_documentation()
+  {
+    if (first_error) {
+      fprintf(stderr,
+              "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
+      fprintf(stderr,
+              "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
+      first_error = false;
+    }
+  }
+
+#define cuda_assert(stmt) \
+  { \
+    CUresult result = stmt; \
+\
+    if (result != CUDA_SUCCESS) { \
+      string message = string_printf( \
+          "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \
+      if (error_msg == "") \
+        error_msg = message; \
+      fprintf(stderr, "%s\n", message.c_str()); \
+      /*cuda_abort();*/ \
+      cuda_error_documentation(); \
+    } \
+  } \
+  (void)0
+
+  bool cuda_error_(CUresult result, const string &stmt)
+  {
+    if (result == CUDA_SUCCESS)
+      return false;
+
+    string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result));
+    if (error_msg == "")
+      error_msg = message;
+    fprintf(stderr, "%s\n", message.c_str());
+    cuda_error_documentation();
+    return true;
+  }
+
+#define cuda_error(stmt) cuda_error_(stmt, #stmt)
+
+  void cuda_error_message(const string &message)
+  {
+    if (error_msg == "")
+      error_msg = message;
+    fprintf(stderr, "%s\n", message.c_str());
+    cuda_error_documentation();
+  }
+
+  CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
+      : Device(info, stats, profiler, background_),
+        texture_info(this, "__texture_info", MEM_TEXTURE)
+  {
+    first_error = true;
+    background = background_;
+
+    cuDevId = info.num;
+    cuDevice = 0;
+    cuContext = 0;
+
+    cuModule = 0;
+    cuFilterModule = 0;
+
+    split_kernel = NULL;
+
+    need_texture_info = false;
+
+    device_texture_headroom = 0;
+    device_working_headroom = 0;
+    move_texture_to_host = false;
+    map_host_limit = 0;
+    map_host_used = 0;
+    can_map_host = 0;
+
+    /* Intialize CUDA. */
+    if (cuda_error(cuInit(0)))
+      return;
+
+    /* Setup device and context. */
+    if (cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
+      return;
+
+    /* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
+     * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
+     * so we can predict which memory to map to host. */
+    cuda_assert(
+        cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
+
+    unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
+    if (can_map_host) {
+      ctx_flags |= CU_CTX_MAP_HOST;
+      init_host_memory();
+    }
+
+    /* Create context. */
+    CUresult result;
+
+    if (background) {
+      result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
+    }
+    else {
+      result = cuGLCtxCreate(&cuContext, ctx_flags, cuDevice);
+
+      if (result != CUDA_SUCCESS) {
+        result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
+        background = true;
+      }
+    }
+
+    if (cuda_error_(result, "cuCtxCreate"))
+      return;
+
+    int major, minor;
+    cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+    cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+    cuDevArchitecture = major * 100 + minor * 10;
+
+    /* Pop context set by cuCtxCreate. */
+    cuCtxPopCurrent(NULL);
+  }
+
+  ~CUDADevice()
+  {
+    task_pool.stop();
+
+    delete split_kernel;
+
+    texture_info.free();
+
+    cuda_assert(cuCtxDestroy(cuContext));
+  }
+
+  bool support_device(const DeviceRequestedFeatures & /*requested_features*/)
+  {
+    int major, minor;
+    cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+    cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+    /* We only support sm_30 and above */
+    if (major < 3) {
+      cuda_error_message(string_printf(
+          "CUDA device supported only with compute capability 3.0 or up, found %d.%d.",
+          major,
+          minor));
+      return false;
+    }
+
+    return true;
+  }
+
+  bool use_adaptive_compilation()
+  {
+    return DebugFlags().cuda.adaptive_compile;
+  }
+
+  bool use_split_kernel()
+  {
+    return DebugFlags().cuda.split_kernel;
+  }
+
+  /* Common NVCC flags which stays the same regardless of shading model,
+   * kernel sources md5 and only depends on compiler or compilation settings.
+   */
+  string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
+                                          bool filter = false,
+                                          bool split = false)
+  {
+    const int machine = system_cpu_bits();
+    const string source_path = path_get("source");
+    const string include_path = source_path;
+    string cflags = string_printf(
+        "-m%d "
+        "--ptxas-options=\"-v\" "
+        "--use_fast_math "
+        "-DNVCC "
+        "-I\"%s\"",
+        machine,
+        include_path.c_str());
+    if (!filter && use_adaptive_compilation()) {
+      cflags += " " + requested_features.get_build_options();
+    }
+    const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
+    if (extra_cflags) {
+      cflags += string(" ") + string(extra_cflags);
+    }
+#ifdef WITH_CYCLES_DEBUG
+    cflags += " -D__KERNEL_DEBUG__";
+#endif
+
+    if (split) {
+      cflags += " -D__SPLIT__";
+    }
+
+    return cflags;
+  }
+
+  bool compile_check_compiler()
+  {
+    const char *nvcc = cuewCompilerPath();
+    if (nvcc == NULL) {
+      cuda_error_message(
+          "CUDA nvcc compiler not found. "
+          "Install CUDA toolkit in default location.");
+      return false;
+    }
+    const int cuda_version = cuewCompilerVersion();
+    VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version << ".";
+    const int major = cuda_version / 10, minor = cuda_version % 10;
+    if (cuda_version == 0) {
+      cuda_error_message("CUDA nvcc compiler version could not be parsed.");
+      return false;
+    }
+    if (cuda_version < 80) {
+      printf(
+          "Unsupported CUDA version %d.%d detected, "
+          "you need CUDA 8.0 or newer.\n",
+          major,
+          minor);
+      return false;
+    }
+    else if (cuda_version != 101) {
+      printf(
+          "CUDA version %d.%d detected, build may succeed but only "
+          "CUDA 10.1 is officially supported.\n",
+          major,
+          minor);
+    }
+    return true;
+  }
+
+  string compile_kernel(const DeviceRequestedFeatures &requested_features,
+                        bool filter = false,
+                        bool split = false)
+  {
+    const char *name, *source;
+    if (filter) {
+      name = "filter";
+      source = "filter.cu";
+    }
+    else if (split) {
+      name = "kernel_split";
+      source = "kernel_split.cu";
+    }
+    else {
+      name = "kernel";
+      source = "kernel.cu";
+    }
+    /* Compute cubin name. */
+    int major, minor;
+    cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+    cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+    /* Attempt to use kernel provided with Blender. */
+    if (!use_adaptive_compilation()) {
+      const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+      VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
+      if (path_exists(cubin)) {
+        VLOG(1) << "Using precompiled kernel.";
+        return cubin;
+      }
+      const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
+      VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+      if (path_exists(ptx)) {
+        VLOG(1) << "Using precompiled kernel.";
+        return ptx;
+      }
+    }
+
+    const string common_cflags = compile_kernel_get_common_cflags(
+        requested_features, filter, split);
+
+    /* Try to use locally compiled kernel. */
+    const string source_path = path_get("source");
+    const string kernel_md5 = path_files_md5_hash(source_path);
+
+    /* We include cflags into md5 so changing cuda toolkit or changing other
+     * compiler command line arguments makes sure cubin gets re-built.
+     */
+    const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
+
+    const string cubin_file = string_printf(
+        "cycles_%s_sm%d%d_%s.cubin", name, major, minor, cubin_md5.c_str());
+    const string cubin = path_cache_get(path_join("kernels", cubin_file));
+    VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
+    if (path_exists(cubin)) {
+      VLOG(1) << "Using locally compiled kernel.";
+      return cubin;
+    }
+
+#ifdef _WIN32
+    if (have_precompiled_kernels()) {
+      if (major < 3) {
+        cuda_error_message(
+            string_printf("CUDA device requires compute capability 3.0 or up, "
+                          "found %d.%d. Your GPU is not supported.",
+                          major,
+                          minor));
+      }
+      else {
+        cuda_error_message(
+            string_printf("CUDA binary kernel for this graphics card compute "
+                          "capability (%d.%d) not found.",
+                          major,
+                          minor));
+      }
+      return "";
+    }
+#endif
+
+    /* Compile. */
+    if (!compile_check_compiler()) {
+      return "";
+    }
+    const char *nvcc = cuewCompilerPath();
+    const string kernel = path_join(path_join(source_path, "kernel"),
+                                    path_join("kernels", path_join("cuda", source)));
+    double starttime = time_dt();
+    printf("Compiling CUDA kernel ...\n");
+
+    path_create_directories(cubin);
+
+    string command = string_printf(
+        "\"%s\" "
+        "-arch=sm_%d%d "
+        "--cubin \"%s\" "
+        "-o \"%s\" "
+        "%s ",
+        nvcc,
+        major,
+        minor,
+        kernel.c_str(),
+        cubin.c_str(),
+        common_cflags.c_str());
+
+    printf("%s\n", command.c_str());
+
+    if (system(command.c_str()) == -1) {
+      cuda_error_message(
+          "Failed to execute compilation command, "
+          "see console for details.");
+      return "";
+    }
+
+    /* Verify if compilation succeeded */
+    if (!path_exists(cubin)) {
+      cuda_error_message(
+          "CUDA kernel compilation failed, "
+          "see console for details.");
+      return "";
+    }
+
+    printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
+
+    return cubin;
+  }
+
+  bool load_kernels(const DeviceRequestedFeatures &requested_features)
+  {
+    /* TODO(sergey): Support kernels re-load for CUDA devices.
+     *
+     * Currently re-loading kernel will invalidate memory pointers,
+     * causing problems in cuCtxSynchronize.
+     */
+    if (cuFilterModule && cuModule) {
+      VLOG(1) << "Skipping kernel reload, not currently supported.";
+      return true;
+    }
+
+    /* check if cuda init succeeded */
+    if (cuContext == 0)
+      return false;
+
+    /* check if GPU is supported */
+    if (!support_device(requested_features))
+      return false;
+
+    /* get kernel */
+    string cubin = compile_kernel(requested_features, false, use_split_kernel());
+    if (cubin == "")
+      return false;
+
+    string filter_cubin = compile_kernel(requested_features, true, false);
+    if (filter_cubin == "")
+      return false;
+
+    /* open module */
+    CUDAContextScope scope(this);
+
+    string cubin_data;
+    CUresult result;
+
+    if (path_read_text(cubin, cubin_data))
+      result = cuModuleLoadData(&cuModule, cubin_data.c_str());
+    else
+      result = CUDA_ERROR_FILE_NOT_FOUND;
+
+    if (cuda_error_(result, "cuModuleLoad"))
+      cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str()));
+
+    if (path_read_text(filter_cubin, cubin_data))
+      result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str());
+    else
+      result = CUDA_ERROR_FILE_NOT_FOUND;
+
+    if (cuda_error_(result, "cuModuleLoad"))
+      cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str()));
+
+    if (result == CUDA_SUCCESS) {
+      reserve_local_memory(requested_features);
+    }
+
+    return (result == CUDA_SUCCESS);
+  }
+
+  void reserve_local_memory(const DeviceRequestedFeatures &requested_features)
+  {
+    if (use_split_kernel()) {
+      /* Split kernel mostly uses global memory and adaptive compilation,
+       * difficult to predict how much is needed currently. */
+      return;
+    }
+
+    /* Together with CU_CTX_LMEM_RESIZE_TO_MAX, this reserves local memory
+     * needed for kernel launches, so that we can reliably figure out when
+     * to allocate scene data in mapped host memory. */
+    CUDAContextScope scope(this);
+
+    size_t total = 0, free_before = 0, free_after = 0;
+    cuMemGetInfo(&free_before, &total);
+
+    /* Get kernel function. */
+    CUfunction cuPathTrace;
+
+    if (requested_features.use_integrator_branched) {
+      cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+    }
+    else {
+      cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+    }
+
+    cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+
+    int min_blocks, num_threads_per_block;
+    cuda_assert(cuOccupancyMaxPotentialBlockSize(
+        &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+
+    /* Launch kernel, using just 1 block appears sufficient to reserve
+     * memory for all multiprocessors. It would be good to do this in
+     * parallel for the multi GPU case still to make it faster. */
+    CUdeviceptr d_work_tiles = 0;
+    uint total_work_size = 0;
+
+    void *args[] = {&d_work_tiles, &total_work_size};
+
+    cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+
+    cuda_assert(cuCtxSynchronize());
+
+    cuMemGetInfo(&free_after, &total);
+    VLOG(1) << "Local memory reserved " << string_human_readable_number(free_before - free_after)
+            << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
+
+#if 0
+    /* For testing mapped host memory, fill up device memory. */
+    const size_t keep_mb = 1024;
+
+    while (free_after > keep_mb * 1024 * 1024LL) {
+      CUdeviceptr tmp;
+      cuda_assert(cuMemAlloc(&tmp, 10 * 1024 * 1024LL));
+      cuMemGetInfo(&free_after, &total);
+    }
+#endif
+  }
+
+  void init_host_memory()
+  {
+    /* Limit amount of host mapped memory, because allocating too much can
+     * cause system instability. Leave at least half or 4 GB of system
+     * memory free, whichever is smaller. */
+    size_t default_limit = 4 * 1024 * 1024 * 1024LL;
+    size_t system_ram = system_physical_ram();
+
+    if (system_ram > 0) {
+      if (system_ram / 2 > default_limit) {
+        map_host_limit = system_ram - default_limit;
+      }
+      else {
+        map_host_limit = system_ram / 2;
+      }
+    }
+    else {
+      VLOG(1) << "Mapped host memory disabled, failed to get system RAM";
+      map_host_limit = 0;
+    }
+
+    /* Amount of device memory to keep is free after texture memory
+     * and working memory allocations respectively. We set the working
+     * memory limit headroom lower so that some space is left after all
+     * texture memory allocations. */
+    device_working_headroom = 32 * 1024 * 1024LL;   // 32MB
+    device_texture_headroom = 128 * 1024 * 1024LL;  // 128MB
+
+    VLOG(1) << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
+            << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
+  }
+
+  void load_texture_info()
+  {
+    if (need_texture_info) {
+      texture_info.copy_to_device();
+      need_texture_info = false;
+    }
+  }
+
+  void move_textures_to_host(size_t size, bool for_texture)
+  {
+    /* Signal to reallocate textures in host memory only. */
+    move_texture_to_host = true;
+
+    while (size > 0) {
+      /* Find suitable memory allocation to move. */
+      device_memory *max_mem = NULL;
+      size_t max_size = 0;
+      bool max_is_image = false;
+
+      foreach (CUDAMemMap::value_type &pair, cuda_mem_map) {
+        device_memory &mem = *pair.first;
+        CUDAMem *cmem = &pair.second;
+
+        bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+        bool is_image = is_texture && (mem.data_height > 1);
+
+        /* Can't move this type of memory. */
+        if (!is_texture || cmem->array) {
+          continue;
+        }
+
+        /* Already in host memory. */
+        if (cmem->use_mapped_host) {
+          continue;
+        }
+
+        /* For other textures, only move image textures. */
+        if (for_texture && !is_image) {
+          continue;
+        }
+
+        /* Try to move largest allocation, prefer moving images. */
+        if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
+          max_is_image = is_image;
+          max_size = mem.device_size;
+          max_mem = &mem;
+        }
+      }
+
+      /* Move to host memory. This part is mutex protected since
+       * multiple CUDA devices could be moving the memory. The
+       * first one will do it, and the rest will adopt the pointer. */
+      if (max_mem) {
+        VLOG(1) << "Move memory from device to host: " << max_mem->name;
+
+        static thread_mutex move_mutex;
+        thread_scoped_lock lock(move_mutex);
+
+        /* Preserve the original device pointer, in case of multi device
+         * we can't change it because the pointer mapping would break. */
+        device_ptr prev_pointer = max_mem->device_pointer;
+        size_t prev_size = max_mem->device_size;
+
+        tex_free(*max_mem);
+        tex_alloc(*max_mem);
+        size = (max_size >= size) ? 0 : size - max_size;
+
+        max_mem->device_pointer = prev_pointer;
+        max_mem->device_size = prev_size;
+      }
+      else {
+        break;
+      }
+    }
+
+    /* Update texture info array with new pointers. */
+    load_texture_info();
+
+    move_texture_to_host = false;
+  }
+
+  CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0)
+  {
+    CUDAContextScope scope(this);
+
+    CUdeviceptr device_pointer = 0;
+    size_t size = mem.memory_size() + pitch_padding;
+
+    CUresult mem_alloc_result = CUDA_ERROR_OUT_OF_MEMORY;
+    const char *status = "";
+
+    /* First try allocating in device memory, respecting headroom. We make
+     * an exception for texture info. It is small and frequently accessed,
+     * so treat it as working memory.
+     *
+     * If there is not enough room for working memory, we will try to move
+     * textures to host memory, assuming the performance impact would have
+     * been worse for working memory. */
+    bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+    bool is_image = is_texture && (mem.data_height > 1);
+
+    size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
+
+    size_t total = 0, free = 0;
+    cuMemGetInfo(&free, &total);
+
+    /* Move textures to host memory if needed. */
+    if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
+      move_textures_to_host(size + headroom - free, is_texture);
+      cuMemGetInfo(&free, &total);
+    }
+
+    /* Allocate in device memory. */
+    if (!move_texture_to_host && (size + headroom) < free) {
+      mem_alloc_result = cuMemAlloc(&device_pointer, size);
+      if (mem_alloc_result == CUDA_SUCCESS) {
+        status = " in device memory";
+      }
+    }
+
+    /* Fall back to mapped host memory if needed and possible. */
+
+    void *shared_pointer = 0;
+
+    if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
+      if (mem.shared_pointer) {
+        /* Another device already allocated host memory. */
+        mem_alloc_result = CUDA_SUCCESS;
+        shared_pointer = mem.shared_pointer;
+      }
+      else if (map_host_used + size < map_host_limit) {
+        /* Allocate host memory ourselves. */
+        mem_alloc_result = cuMemHostAlloc(
+            &shared_pointer, size, CU_MEMHOSTALLOC_DEVICEMAP | CU_MEMHOSTALLOC_WRITECOMBINED);
+
+        assert((mem_alloc_result == CUDA_SUCCESS && shared_pointer != 0) ||
+               (mem_alloc_result != CUDA_SUCCESS && shared_pointer == 0));
+      }
+
+      if (mem_alloc_result == CUDA_SUCCESS) {
+        cuda_assert(cuMemHostGetDevicePointer_v2(&device_pointer, shared_pointer, 0));
+        map_host_used += size;
+        status = " in host memory";
+      }
+      else {
+        status = " failed, out of host memory";
+      }
+    }
+
+    if (mem_alloc_result != CUDA_SUCCESS) {
+      status = " failed, out of device and host memory";
+      cuda_assert(mem_alloc_result);
+    }
+
+    if (mem.name) {
+      VLOG(1) << "Buffer allocate: " << mem.name << ", "
+              << string_human_readable_number(mem.memory_size()) << " bytes. ("
+              << string_human_readable_size(mem.memory_size()) << ")" << status;
+    }
+
+    mem.device_pointer = (device_ptr)device_pointer;
+    mem.device_size = size;
+    stats.mem_alloc(size);
+
+    if (!mem.device_pointer) {
+      return NULL;
+    }
+
+    /* Insert into map of allocations. */
+    CUDAMem *cmem = &cuda_mem_map[&mem];
+    if (shared_pointer != 0) {
+      /* Replace host pointer with our host allocation. Only works if
+       * CUDA memory layout is the same and has no pitch padding. Also
+       * does not work if we move textures to host during a render,
+       * since other devices might be using the memory. */
+
+      if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
+          mem.host_pointer != shared_pointer) {
+        memcpy(shared_pointer, mem.host_pointer, size);
+
+        /* A Call to device_memory::host_free() should be preceded by
+         * a call to device_memory::device_free() for host memory
+         * allocated by a device to be handled properly. Two exceptions
+         * are here and a call in OptiXDevice::generic_alloc(), where
+         * the current host memory can be assumed to be allocated by
+         * device_memory::host_alloc(), not by a device */
+
+        mem.host_free();
+        mem.host_pointer = shared_pointer;
+      }
+      mem.shared_pointer = shared_pointer;
+      mem.shared_counter++;
+      cmem->use_mapped_host = true;
+    }
+    else {
+      cmem->use_mapped_host = false;
+    }
+
+    return cmem;
+  }
+
+  void generic_copy_to(device_memory &mem)
+  {
+    if (mem.host_pointer && mem.device_pointer) {
+      CUDAContextScope scope(this);
+
+      /* If use_mapped_host of mem is false, the current device only
+       * uses device memory allocated by cuMemAlloc regardless of
+       * mem.host_pointer and mem.shared_pointer, and should copy
+       * data from mem.host_pointer. */
+
+      if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) {
+        cuda_assert(cuMemcpyHtoD(
+            cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
+      }
+    }
+  }
+
+  void generic_free(device_memory &mem)
+  {
+    if (mem.device_pointer) {
+      CUDAContextScope scope(this);
+      const CUDAMem &cmem = cuda_mem_map[&mem];
+
+      /* If cmem.use_mapped_host is true, reference counting is used
+       * to safely free a mapped host memory. */
+
+      if (cmem.use_mapped_host) {
+        assert(mem.shared_pointer);
+        if (mem.shared_pointer) {
+          assert(mem.shared_counter > 0);
+          if (--mem.shared_counter == 0) {
+            if (mem.host_pointer == mem.shared_pointer) {
+              mem.host_pointer = 0;
+            }
+            cuMemFreeHost(mem.shared_pointer);
+            mem.shared_pointer = 0;
+          }
+        }
+        map_host_used -= mem.device_size;
+      }
+      else {
+        /* Free device memory. */
+        cuMemFree(mem.device_pointer);
+      }
+
+      stats.mem_free(mem.device_size);
+      mem.device_pointer = 0;
+      mem.device_size = 0;
+
+      cuda_mem_map.erase(cuda_mem_map.find(&mem));
+    }
+  }
+
+  void mem_alloc(device_memory &mem)
+  {
+    if (mem.type == MEM_PIXELS && !background) {
+      pixels_alloc(mem);
+    }
+    else if (mem.type == MEM_TEXTURE) {
+      assert(!"mem_alloc not supported for textures.");
+    }
+    else {
+      generic_alloc(mem);
+    }
+  }
+
+  void mem_copy_to(device_memory &mem)
+  {
+    if (mem.type == MEM_PIXELS) {
+      assert(!"mem_copy_to not supported for pixels.");
+    }
+    else if (mem.type == MEM_TEXTURE) {
+      tex_free(mem);
+      tex_alloc(mem);
+    }
+    else {
+      if (!mem.device_pointer) {
+        generic_alloc(mem);
+      }
+
+      generic_copy_to(mem);
+    }
+  }
+
+  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)
+  {
+    if (mem.type == MEM_PIXELS && !background) {
+      pixels_copy_from(mem, y, w, h);
+    }
+    else if (mem.type == MEM_TEXTURE) {
+      assert(!"mem_copy_from not supported for textures.");
+    }
+    else {
+      CUDAContextScope scope(this);
+      size_t offset = elem * y * w;
+      size_t size = elem * w * h;
+
+      if (mem.host_pointer && mem.device_pointer) {
+        cuda_assert(cuMemcpyDtoH(
+            (uchar *)mem.host_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size));
+      }
+      else if (mem.host_pointer) {
+        memset((char *)mem.host_pointer + offset, 0, size);
+      }
+    }
+  }
+
+  void mem_zero(device_memory &mem)
+  {
+    if (!mem.device_pointer) {
+      mem_alloc(mem);
+    }
+
+    if (mem.host_pointer) {
+      memset(mem.host_pointer, 0, mem.memory_size());
+    }
+
+    /* If use_mapped_host of mem is false, mem.device_pointer currently
+     * refers to device memory regardless of mem.host_pointer and
+     * mem.shared_pointer. */
+
+    if (mem.device_pointer &&
+        (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer)) {
+      CUDAContextScope scope(this);
+      cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
+    }
+  }
+
+  void mem_free(device_memory &mem)
+  {
+    if (mem.type == MEM_PIXELS && !background) {
+      pixels_free(mem);
+    }
+    else if (mem.type == MEM_TEXTURE) {
+      tex_free(mem);
+    }
+    else {
+      generic_free(mem);
+    }
+  }
+
+  virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/)
+  {
+    return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
+  }
+
+  void const_copy_to(const char *name, void *host, size_t size)
+  {
+    CUDAContextScope scope(this);
+    CUdeviceptr mem;
+    size_t bytes;
+
+    cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
+    // assert(bytes == size);
+    cuda_assert(cuMemcpyHtoD(mem, host, size));
+  }
+
+  void tex_alloc(device_memory &mem)
+  {
+    CUDAContextScope scope(this);
+
+    /* General variables for both architectures */
+    string bind_name = mem.name;
+    size_t dsize = datatype_size(mem.data_type);
+    size_t size = mem.memory_size();
+
+    CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
+    switch (mem.extension) {
+      case EXTENSION_REPEAT:
+        address_mode = CU_TR_ADDRESS_MODE_WRAP;
+        break;
+      case EXTENSION_EXTEND:
+        address_mode = CU_TR_ADDRESS_MODE_CLAMP;
+        break;
+      case EXTENSION_CLIP:
+        address_mode = CU_TR_ADDRESS_MODE_BORDER;
+        break;
+      default:
+        assert(0);
+        break;
+    }
+
+    CUfilter_mode filter_mode;
+    if (mem.interpolation == INTERPOLATION_CLOSEST) {
+      filter_mode = CU_TR_FILTER_MODE_POINT;
+    }
+    else {
+      filter_mode = CU_TR_FILTER_MODE_LINEAR;
+    }
+
+    /* Data Storage */
+    if (mem.interpolation == INTERPOLATION_NONE) {
+      generic_alloc(mem);
+      generic_copy_to(mem);
+
+      CUdeviceptr cumem;
+      size_t cubytes;
+
+      cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
+
+      if (cubytes == 8) {
+        /* 64 bit device pointer */
+        uint64_t ptr = mem.device_pointer;
+        cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes));
+      }
+      else {
+        /* 32 bit device pointer */
+        uint32_t ptr = (uint32_t)mem.device_pointer;
+        cuda_assert(cuMemcpyHtoD(cumem, (void *)&ptr, cubytes));
+      }
+      return;
+    }
+
+    /* Image Texture Storage */
+    CUarray_format_enum format;
+    switch (mem.data_type) {
+      case TYPE_UCHAR:
+        format = CU_AD_FORMAT_UNSIGNED_INT8;
+        break;
+      case TYPE_UINT16:
+        format = CU_AD_FORMAT_UNSIGNED_INT16;
+        break;
+      case TYPE_UINT:
+        format = CU_AD_FORMAT_UNSIGNED_INT32;
+        break;
+      case TYPE_INT:
+        format = CU_AD_FORMAT_SIGNED_INT32;
+        break;
+      case TYPE_FLOAT:
+        format = CU_AD_FORMAT_FLOAT;
+        break;
+      case TYPE_HALF:
+        format = CU_AD_FORMAT_HALF;
+        break;
+      default:
+        assert(0);
+        return;
+    }
+
+    CUDAMem *cmem = NULL;
+    CUarray array_3d = NULL;
+    size_t src_pitch = mem.data_width * dsize * mem.data_elements;
+    size_t dst_pitch = src_pitch;
+
+    if (mem.data_depth > 1) {
+      /* 3D texture using array, there is no API for linear memory. */
+      CUDA_ARRAY3D_DESCRIPTOR desc;
+
+      desc.Width = mem.data_width;
+      desc.Height = mem.data_height;
+      desc.Depth = mem.data_depth;
+      desc.Format = format;
+      desc.NumChannels = mem.data_elements;
+      desc.Flags = 0;
+
+      VLOG(1) << "Array 3D allocate: " << mem.name << ", "
+              << string_human_readable_number(mem.memory_size()) << " bytes. ("
+              << string_human_readable_size(mem.memory_size()) << ")";
+
+      cuda_assert(cuArray3DCreate(&array_3d, &desc));
+
+      if (!array_3d) {
+        return;
+      }
+
+      CUDA_MEMCPY3D param;
+      memset(&param, 0, sizeof(param));
+      param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+      param.dstArray = array_3d;
+      param.srcMemoryType = CU_MEMORYTYPE_HOST;
+      param.srcHost = mem.host_pointer;
+      param.srcPitch = src_pitch;
+      param.WidthInBytes = param.srcPitch;
+      param.Height = mem.data_height;
+      param.Depth = mem.data_depth;
+
+      cuda_assert(cuMemcpy3D(&param));
+
+      mem.device_pointer = (device_ptr)array_3d;
+      mem.device_size = size;
+      stats.mem_alloc(size);
+
+      cmem = &cuda_mem_map[&mem];
+      cmem->texobject = 0;
+      cmem->array = array_3d;
+    }
+    else if (mem.data_height > 0) {
+      /* 2D texture, using pitch aligned linear memory. */
+      int alignment = 0;
+      cuda_assert(
+          cuDeviceGetAttribute(&alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
+      dst_pitch = align_up(src_pitch, alignment);
+      size_t dst_size = dst_pitch * mem.data_height;
+
+      cmem = generic_alloc(mem, dst_size - mem.memory_size());
+      if (!cmem) {
+        return;
+      }
+
+      CUDA_MEMCPY2D param;
+      memset(&param, 0, sizeof(param));
+      param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+      param.dstDevice = mem.device_pointer;
+      param.dstPitch = dst_pitch;
+      param.srcMemoryType = CU_MEMORYTYPE_HOST;
+      param.srcHost = mem.host_pointer;
+      param.srcPitch = src_pitch;
+      param.WidthInBytes = param.srcPitch;
+      param.Height = mem.data_height;
+
+      cuda_assert(cuMemcpy2DUnaligned(&param));
+    }
+    else {
+      /* 1D texture, using linear memory. */
+      cmem = generic_alloc(mem);
+      if (!cmem) {
+        return;
+      }
+
+      cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
+    }
+
+    /* Kepler+, bindless textures. */
+    int flat_slot = 0;
+    if (string_startswith(mem.name, "__tex_image")) {
+      int pos = string(mem.name).rfind("_");
+      flat_slot = atoi(mem.name + pos + 1);
+    }
+    else {
+      assert(0);
+    }
+
+    CUDA_RESOURCE_DESC resDesc;
+    memset(&resDesc, 0, sizeof(resDesc));
+
+    if (array_3d) {
+      resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+      resDesc.res.array.hArray = array_3d;
+      resDesc.flags = 0;
+    }
+    else if (mem.data_height > 0) {
+      resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
+      resDesc.res.pitch2D.devPtr = mem.device_pointer;
+      resDesc.res.pitch2D.format = format;
+      resDesc.res.pitch2D.numChannels = mem.data_elements;
+      resDesc.res.pitch2D.height = mem.data_height;
+      resDesc.res.pitch2D.width = mem.data_width;
+      resDesc.res.pitch2D.pitchInBytes = dst_pitch;
+    }
+    else {
+      resDesc.resType = CU_RESOURCE_TYPE_LINEAR;
+      resDesc.res.linear.devPtr = mem.device_pointer;
+      resDesc.res.linear.format = format;
+      resDesc.res.linear.numChannels = mem.data_elements;
+      resDesc.res.linear.sizeInBytes = mem.device_size;
+    }
+
+    CUDA_TEXTURE_DESC texDesc;
+    memset(&texDesc, 0, sizeof(texDesc));
+    texDesc.addressMode[0] = address_mode;
+    texDesc.addressMode[1] = address_mode;
+    texDesc.addressMode[2] = address_mode;
+    texDesc.filterMode = filter_mode;
+    texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
+
+    cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
+
+    /* Resize once */
+    if (flat_slot >= texture_info.size()) {
+      /* Allocate some slots in advance, to reduce amount
+       * of re-allocations. */
+      texture_info.resize(flat_slot + 128);
+    }
+
+    /* Set Mapping and tag that we need to (re-)upload to device */
+    TextureInfo &info = texture_info[flat_slot];
+    info.data = (uint64_t)cmem->texobject;
+    info.cl_buffer = 0;
+    info.interpolation = mem.interpolation;
+    info.extension = mem.extension;
+    info.width = mem.data_width;
+    info.height = mem.data_height;
+    info.depth = mem.data_depth;
+    need_texture_info = true;
+  }
+
+  void tex_free(device_memory &mem)
+  {
+    if (mem.device_pointer) {
+      CUDAContextScope scope(this);
+      const CUDAMem &cmem = cuda_mem_map[&mem];
+
+      if (cmem.texobject) {
+        /* Free bindless texture. */
+        cuTexObjectDestroy(cmem.texobject);
+      }
+
+      if (cmem.array) {
+        /* Free array. */
+        cuArrayDestroy(cmem.array);
+        stats.mem_free(mem.device_size);
+        mem.device_pointer = 0;
+        mem.device_size = 0;
+
+        cuda_mem_map.erase(cuda_mem_map.find(&mem));
+      }
+      else {
+        generic_free(mem);
+      }
+    }
+  }
+
+#define CUDA_GET_BLOCKSIZE(func, w, h) \
+  int threads_per_block; \
+  cuda_assert( \
+      cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+  int threads = (int)sqrt((float)threads_per_block); \
+  int xblocks = ((w) + threads - 1) / threads; \
+  int yblocks = ((h) + threads - 1) / threads;
+
+#define CUDA_LAUNCH_KERNEL(func, args) \
+  cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0));
+
+/* Similar as above, but for 1-dimensional blocks. */
+#define CUDA_GET_BLOCKSIZE_1D(func, w, h) \
+  int threads_per_block; \
+  cuda_assert( \
+      cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); \
+  int xblocks = ((w) + threads_per_block - 1) / threads_per_block; \
+  int yblocks = h;
+
+#define CUDA_LAUNCH_KERNEL_1D(func, args) \
+  cuda_assert(cuLaunchKernel(func, xblocks, yblocks, 1, threads_per_block, 1, 1, 0, 0, args, 0));
+
+  bool denoising_non_local_means(device_ptr image_ptr,
+                                 device_ptr guide_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr out_ptr,
+                                 DenoisingTask *task)
+  {
+    if (have_error())
+      return false;
+
+    CUDAContextScope scope(this);
+
+    int stride = task->buffer.stride;
+    int w = task->buffer.width;
+    int h = task->buffer.h;
+    int r = task->nlm_state.r;
+    int f = task->nlm_state.f;
+    float a = task->nlm_state.a;
+    float k_2 = task->nlm_state.k_2;
+
+    int pass_stride = task->buffer.pass_stride;
+    int num_shifts = (2 * r + 1) * (2 * r + 1);
+    int channel_offset = task->nlm_state.is_color ? task->buffer.pass_stride : 0;
+    int frame_offset = 0;
+
+    if (have_error())
+      return false;
+
+    CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
+    CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
+    CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts;
+    CUdeviceptr scale_ptr = 0;
+
+    cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float) * pass_stride));
+    cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float) * pass_stride));
+
+    {
+      CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput;
+      cuda_assert(cuModuleGetFunction(
+          &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
+      cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
+      cuda_assert(cuModuleGetFunction(
+          &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
+      cuda_assert(cuModuleGetFunction(
+          &cuNLMUpdateOutput, cuFilterModule, "kernel_cuda_filter_nlm_update_output"));
+
+      cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
+      cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
+      cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
+      cuda_assert(cuFuncSetCacheConfig(cuNLMUpdateOutput, CU_FUNC_CACHE_PREFER_L1));
+
+      CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w * h, num_shifts);
+
+      void *calc_difference_args[] = {&guide_ptr,
+                                      &variance_ptr,
+                                      &scale_ptr,
+                                      &difference,
+                                      &w,
+                                      &h,
+                                      &stride,
+                                      &pass_stride,
+                                      &r,
+                                      &channel_offset,
+                                      &frame_offset,
+                                      &a,
+                                      &k_2};
+      void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+      void *calc_weight_args[] = {
+          &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
+      void *update_output_args[] = {&blurDifference,
+                                    &image_ptr,
+                                    &out_ptr,
+                                    &weightAccum,
+                                    &w,
+                                    &h,
+                                    &stride,
+                                    &pass_stride,
+                                    &channel_offset,
+                                    &r,
+                                    &f};
+
+      CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
+      CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+      CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
+      CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+      CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
+    }
+
+    {
+      CUfunction cuNLMNormalize;
+      cuda_assert(cuModuleGetFunction(
+          &cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize"));
+      cuda_assert(cuFuncSetCacheConfig(cuNLMNormalize, CU_FUNC_CACHE_PREFER_L1));
+      void *normalize_args[] = {&out_ptr, &weightAccum, &w, &h, &stride};
+      CUDA_GET_BLOCKSIZE(cuNLMNormalize, w, h);
+      CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args);
+      cuda_assert(cuCtxSynchronize());
+    }
+
+    return !have_error();
+  }
+
+  bool denoising_construct_transform(DenoisingTask *task)
+  {
+    if (have_error())
+      return false;
+
+    CUDAContextScope scope(this);
+
+    CUfunction cuFilterConstructTransform;
+    cuda_assert(cuModuleGetFunction(
+        &cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform"));
+    cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, CU_FUNC_CACHE_PREFER_SHARED));
+    CUDA_GET_BLOCKSIZE(cuFilterConstructTransform, task->storage.w, task->storage.h);
+
+    void *args[] = {&task->buffer.mem.device_pointer,
+                    &task->tile_info_mem.device_pointer,
+                    &task->storage.transform.device_pointer,
+                    &task->storage.rank.device_pointer,
+                    &task->filter_area,
+                    &task->rect,
+                    &task->radius,
+                    &task->pca_threshold,
+                    &task->buffer.pass_stride,
+                    &task->buffer.frame_stride,
+                    &task->buffer.use_time};
+    CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
+    cuda_assert(cuCtxSynchronize());
+
+    return !have_error();
+  }
+
+  bool denoising_accumulate(device_ptr color_ptr,
+                            device_ptr color_variance_ptr,
+                            device_ptr scale_ptr,
+                            int frame,
+                            DenoisingTask *task)
+  {
+    if (have_error())
+      return false;
+
+    CUDAContextScope scope(this);
+
+    int r = task->radius;
+    int f = 4;
+    float a = 1.0f;
+    float k_2 = task->nlm_k_2;
+
+    int w = task->reconstruction_state.source_w;
+    int h = task->reconstruction_state.source_h;
+    int stride = task->buffer.stride;
+    int frame_offset = frame * task->buffer.frame_stride;
+    int t = task->tile_info->frames[frame];
+
+    int pass_stride = task->buffer.pass_stride;
+    int num_shifts = (2 * r + 1) * (2 * r + 1);
+
+    if (have_error())
+      return false;
+
+    CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
+    CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
+
+    CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
+    cuda_assert(cuModuleGetFunction(
+        &cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
+    cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur"));
+    cuda_assert(cuModuleGetFunction(
+        &cuNLMCalcWeight, cuFilterModule, "kernel_cuda_filter_nlm_calc_weight"));
+    cuda_assert(cuModuleGetFunction(
+        &cuNLMConstructGramian, cuFilterModule, "kernel_cuda_filter_nlm_construct_gramian"));
+
+    cuda_assert(cuFuncSetCacheConfig(cuNLMCalcDifference, CU_FUNC_CACHE_PREFER_L1));
+    cuda_assert(cuFuncSetCacheConfig(cuNLMBlur, CU_FUNC_CACHE_PREFER_L1));
+    cuda_assert(cuFuncSetCacheConfig(cuNLMCalcWeight, CU_FUNC_CACHE_PREFER_L1));
+    cuda_assert(cuFuncSetCacheConfig(cuNLMConstructGramian, CU_FUNC_CACHE_PREFER_SHARED));
+
+    CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference,
+                          task->reconstruction_state.source_w *
+                              task->reconstruction_state.source_h,
+                          num_shifts);
+
+    void *calc_difference_args[] = {&color_ptr,
+                                    &color_variance_ptr,
+                                    &scale_ptr,
+                                    &difference,
+                                    &w,
+                                    &h,
+                                    &stride,
+                                    &pass_stride,
+                                    &r,
+                                    &pass_stride,
+                                    &frame_offset,
+                                    &a,
+                                    &k_2};
+    void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f};
+    void *calc_weight_args[] = {
+        &blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f};
+    void *construct_gramian_args[] = {&t,
+                                      &blurDifference,
+                                      &task->buffer.mem.device_pointer,
+                                      &task->storage.transform.device_pointer,
+                                      &task->storage.rank.device_pointer,
+                                      &task->storage.XtWX.device_pointer,
+                                      &task->storage.XtWY.device_pointer,
+                                      &task->reconstruction_state.filter_window,
+                                      &w,
+                                      &h,
+                                      &stride,
+                                      &pass_stride,
+                                      &r,
+                                      &f,
+                                      &frame_offset,
+                                      &task->buffer.use_time};
+
+    CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args);
+    CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+    CUDA_LAUNCH_KERNEL_1D(cuNLMCalcWeight, calc_weight_args);
+    CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args);
+    CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
+    cuda_assert(cuCtxSynchronize());
+
+    return !have_error();
+  }
+
+  bool denoising_solve(device_ptr output_ptr, DenoisingTask *task)
+  {
+    CUfunction cuFinalize;
+    cuda_assert(cuModuleGetFunction(&cuFinalize, cuFilterModule, "kernel_cuda_filter_finalize"));
+    cuda_assert(cuFuncSetCacheConfig(cuFinalize, CU_FUNC_CACHE_PREFER_L1));
+    void *finalize_args[] = {&output_ptr,
+                             &task->storage.rank.device_pointer,
+                             &task->storage.XtWX.device_pointer,
+                             &task->storage.XtWY.device_pointer,
+                             &task->filter_area,
+                             &task->reconstruction_state.buffer_params.x,
+                             &task->render_buffer.samples};
+    CUDA_GET_BLOCKSIZE(
+        cuFinalize, task->reconstruction_state.source_w, task->reconstruction_state.source_h);
+    CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
+    cuda_assert(cuCtxSynchronize());
+
+    return !have_error();
+  }
+
+  bool denoising_combine_halves(device_ptr a_ptr,
+                                device_ptr b_ptr,
+                                device_ptr mean_ptr,
+                                device_ptr variance_ptr,
+                                int r,
+                                int4 rect,
+                                DenoisingTask *task)
+  {
+    if (have_error())
+      return false;
+
+    CUDAContextScope scope(this);
+
+    CUfunction cuFilterCombineHalves;
+    cuda_assert(cuModuleGetFunction(
+        &cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves"));
+    cuda_assert(cuFuncSetCacheConfig(cuFilterCombineHalves, CU_FUNC_CACHE_PREFER_L1));
+    CUDA_GET_BLOCKSIZE(
+        cuFilterCombineHalves, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+    void *args[] = {&mean_ptr, &variance_ptr, &a_ptr, &b_ptr, &rect, &r};
+    CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args);
+    cuda_assert(cuCtxSynchronize());
+
+    return !have_error();
+  }
+
+  bool denoising_divide_shadow(device_ptr a_ptr,
+                               device_ptr b_ptr,
+                               device_ptr sample_variance_ptr,
+                               device_ptr sv_variance_ptr,
+                               device_ptr buffer_variance_ptr,
+                               DenoisingTask *task)
+  {
+    if (have_error())
+      return false;
+
+    CUDAContextScope scope(this);
+
+    CUfunction cuFilterDivideShadow;
+    cuda_assert(cuModuleGetFunction(
+        &cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow"));
+    cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1));
+    CUDA_GET_BLOCKSIZE(
+        cuFilterDivideShadow, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+    void *args[] = {&task->render_buffer.samples,
+                    &task->tile_info_mem.device_pointer,
+                    &a_ptr,
+                    &b_ptr,
+                    &sample_variance_ptr,
+                    &sv_variance_ptr,
+                    &buffer_variance_ptr,
+                    &task->rect,
+                    &task->render_buffer.pass_stride,
+                    &task->render_buffer.offset};
+    CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
+    cuda_assert(cuCtxSynchronize());
+
+    return !have_error();
+  }
+
+  bool denoising_get_feature(int mean_offset,
+                             int variance_offset,
+                             device_ptr mean_ptr,
+                             device_ptr variance_ptr,
+                             float scale,
+                             DenoisingTask *task)
+  {
+    if (have_error())
+      return false;
+
+    CUDAContextScope scope(this);
+
+    CUfunction cuFilterGetFeature;
+    cuda_assert(cuModuleGetFunction(
+        &cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature"));
+    cuda_assert(cuFuncSetCacheConfig(cuFilterGetFeature, CU_FUNC_CACHE_PREFER_L1));
+    CUDA_GET_BLOCKSIZE(
+        cuFilterGetFeature, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+    void *args[] = {&task->render_buffer.samples,
+                    &task->tile_info_mem.device_pointer,
+                    &mean_offset,
+                    &variance_offset,
+                    &mean_ptr,
+                    &variance_ptr,
+                    &scale,
+                    &task->rect,
+                    &task->render_buffer.pass_stride,
+                    &task->render_buffer.offset};
+    CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
+    cuda_assert(cuCtxSynchronize());
+
+    return !have_error();
+  }
+
+  bool denoising_write_feature(int out_offset,
+                               device_ptr from_ptr,
+                               device_ptr buffer_ptr,
+                               DenoisingTask *task)
+  {
+    if (have_error())
+      return false;
+
+    CUDAContextScope scope(this);
+
+    CUfunction cuFilterWriteFeature;
+    cuda_assert(cuModuleGetFunction(
+        &cuFilterWriteFeature, cuFilterModule, "kernel_cuda_filter_write_feature"));
+    cuda_assert(cuFuncSetCacheConfig(cuFilterWriteFeature, CU_FUNC_CACHE_PREFER_L1));
+    CUDA_GET_BLOCKSIZE(cuFilterWriteFeature, task->filter_area.z, task->filter_area.w);
+
+    void *args[] = {&task->render_buffer.samples,
+                    &task->reconstruction_state.buffer_params,
+                    &task->filter_area,
+                    &from_ptr,
+                    &buffer_ptr,
+                    &out_offset,
+                    &task->rect};
+    CUDA_LAUNCH_KERNEL(cuFilterWriteFeature, args);
+    cuda_assert(cuCtxSynchronize());
+
+    return !have_error();
+  }
+
+  bool denoising_detect_outliers(device_ptr image_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr depth_ptr,
+                                 device_ptr output_ptr,
+                                 DenoisingTask *task)
+  {
+    if (have_error())
+      return false;
+
+    CUDAContextScope scope(this);
+
+    CUfunction cuFilterDetectOutliers;
+    cuda_assert(cuModuleGetFunction(
+        &cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
+    cuda_assert(cuFuncSetCacheConfig(cuFilterDetectOutliers, CU_FUNC_CACHE_PREFER_L1));
+    CUDA_GET_BLOCKSIZE(
+        cuFilterDetectOutliers, task->rect.z - task->rect.x, task->rect.w - task->rect.y);
+
+    void *args[] = {&image_ptr,
+                    &variance_ptr,
+                    &depth_ptr,
+                    &output_ptr,
+                    &task->rect,
+                    &task->buffer.pass_stride};
+
+    CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
+    cuda_assert(cuCtxSynchronize());
+
+    return !have_error();
+  }
+
+  void denoise(RenderTile &rtile, DenoisingTask &denoising)
+  {
+    denoising.functions.construct_transform = function_bind(
+        &CUDADevice::denoising_construct_transform, this, &denoising);
+    denoising.functions.accumulate = function_bind(
+        &CUDADevice::denoising_accumulate, this, _1, _2, _3, _4, &denoising);
+    denoising.functions.solve = function_bind(&CUDADevice::denoising_solve, this, _1, &denoising);
+    denoising.functions.divide_shadow = function_bind(
+        &CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
+    denoising.functions.non_local_means = function_bind(
+        &CUDADevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
+    denoising.functions.combine_halves = function_bind(
+        &CUDADevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
+    denoising.functions.get_feature = function_bind(
+        &CUDADevice::denoising_get_feature, this, _1, _2, _3, _4, _5, &denoising);
+    denoising.functions.write_feature = function_bind(
+        &CUDADevice::denoising_write_feature, this, _1, _2, _3, &denoising);
+    denoising.functions.detect_outliers = function_bind(
+        &CUDADevice::denoising_detect_outliers, this, _1, _2, _3, _4, &denoising);
+
+    denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
+    denoising.render_buffer.samples = rtile.sample;
+    denoising.buffer.gpu_temporary_mem = true;
+
+    denoising.run_denoising(&rtile);
+  }
+
+  void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
+  {
+    scoped_timer timer(&rtile.buffers->render_time);
+
+    if (have_error())
+      return;
+
+    CUDAContextScope scope(this);
+    CUfunction cuPathTrace;
+
+    /* Get kernel function. */
+    if (task.integrator_branched) {
+      cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+    }
+    else {
+      cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+    }
+
+    if (have_error()) {
+      return;
+    }
+
+    cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+
+    /* Allocate work tile. */
+    work_tiles.alloc(1);
+
+    WorkTile *wtile = work_tiles.data();
+    wtile->x = rtile.x;
+    wtile->y = rtile.y;
+    wtile->w = rtile.w;
+    wtile->h = rtile.h;
+    wtile->offset = rtile.offset;
+    wtile->stride = rtile.stride;
+    wtile->buffer = (float *)cuda_device_ptr(rtile.buffer);
+
+    /* Prepare work size. More step samples render faster, but for now we
+     * remain conservative for GPUs connected to a display to avoid driver
+     * timeouts and display freezing. */
+    int min_blocks, num_threads_per_block;
+    cuda_assert(cuOccupancyMaxPotentialBlockSize(
+        &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+    if (!info.display_device) {
+      min_blocks *= 8;
+    }
+
+    uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);
+
+    /* Render all samples. */
+    int start_sample = rtile.start_sample;
+    int end_sample = rtile.start_sample + rtile.num_samples;
+
+    for (int sample = start_sample; sample < end_sample; sample += step_samples) {
+      /* Setup and copy work tile to device. */
+      wtile->start_sample = sample;
+      wtile->num_samples = min(step_samples, end_sample - sample);
+      work_tiles.copy_to_device();
+
+      CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
+      uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
+      uint num_blocks = divide_up(total_work_size, num_threads_per_block);
+
+      /* Launch kernel. */
+      void *args[] = {&d_work_tiles, &total_work_size};
+
+      cuda_assert(cuLaunchKernel(
+          cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+
+      cuda_assert(cuCtxSynchronize());
+
+      /* Update progress. */
+      rtile.sample = sample + wtile->num_samples;
+      task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
+
+      if (task.get_cancel()) {
+        if (task.need_finish_queue == false)
+          break;
+      }
+    }
+  }
+
+  void film_convert(DeviceTask &task,
+                    device_ptr buffer,
+                    device_ptr rgba_byte,
+                    device_ptr rgba_half)
+  {
+    if (have_error())
+      return;
+
+    CUDAContextScope scope(this);
+
+    CUfunction cuFilmConvert;
+    CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half);
+    CUdeviceptr d_buffer = cuda_device_ptr(buffer);
+
+    /* get kernel function */
+    if (rgba_half) {
+      cuda_assert(
+          cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"));
+    }
+    else {
+      cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
+    }
+
+    float sample_scale = 1.0f / (task.sample + 1);
+
+    /* pass in parameters */
+    void *args[] = {&d_rgba,
+                    &d_buffer,
+                    &sample_scale,
+                    &task.x,
+                    &task.y,
+                    &task.w,
+                    &task.h,
+                    &task.offset,
+                    &task.stride};
+
+    /* launch kernel */
+    int threads_per_block;
+    cuda_assert(cuFuncGetAttribute(
+        &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert));
+
+    int xthreads = (int)sqrt(threads_per_block);
+    int ythreads = (int)sqrt(threads_per_block);
+    int xblocks = (task.w + xthreads - 1) / xthreads;
+    int yblocks = (task.h + ythreads - 1) / ythreads;
+
+    cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
+
+    cuda_assert(cuLaunchKernel(cuFilmConvert,
+                               xblocks,
+                               yblocks,
+                               1, /* blocks */
+                               xthreads,
+                               ythreads,
+                               1, /* threads */
+                               0,
+                               0,
+                               args,
+                               0));
+
+    unmap_pixels((rgba_byte) ? rgba_byte : rgba_half);
+
+    cuda_assert(cuCtxSynchronize());
+  }
+
+  void shader(DeviceTask &task)
+  {
+    if (have_error())
+      return;
+
+    CUDAContextScope scope(this);
+
+    CUfunction cuShader;
+    CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
+    CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
+
+    /* get kernel function */
+    if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
+      cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
+    }
+    else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+      cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
+    }
+    else {
+      cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background"));
+    }
+
+    /* do tasks in smaller chunks, so we can cancel it */
+    const int shader_chunk_size = 65536;
+    const int start = task.shader_x;
+    const int end = task.shader_x + task.shader_w;
+    int offset = task.offset;
+
+    bool canceled = false;
+    for (int sample = 0; sample < task.num_samples && !canceled; sample++) {
+      for (int shader_x = start; shader_x < end; shader_x += shader_chunk_size) {
+        int shader_w = min(shader_chunk_size, end - shader_x);
+
+        /* pass in parameters */
+        void *args[8];
+        int arg = 0;
+        args[arg++] = &d_input;
+        args[arg++] = &d_output;
+        args[arg++] = &task.shader_eval_type;
+        if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
+          args[arg++] = &task.shader_filter;
+        }
+        args[arg++] = &shader_x;
+        args[arg++] = &shader_w;
+        args[arg++] = &offset;
+        args[arg++] = &sample;
+
+        /* launch kernel */
+        int threads_per_block;
+        cuda_assert(cuFuncGetAttribute(
+            &threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
+
+        int xblocks = (shader_w + threads_per_block - 1) / threads_per_block;
+
+        cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
+        cuda_assert(cuLaunchKernel(cuShader,
+                                   xblocks,
+                                   1,
+                                   1, /* blocks */
+                                   threads_per_block,
+                                   1,
+                                   1, /* threads */
+                                   0,
+                                   0,
+                                   args,
+                                   0));
+
+        cuda_assert(cuCtxSynchronize());
+
+        if (task.get_cancel()) {
+          canceled = true;
+          break;
+        }
+      }
+
+      task.update_progress(NULL);
+    }
+  }
+
+  CUdeviceptr map_pixels(device_ptr mem)
+  {
+    if (!background) {
+      PixelMem pmem = pixel_mem_map[mem];
+      CUdeviceptr buffer;
+
+      size_t bytes;
+      cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0));
+      cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource));
+
+      return buffer;
+    }
+
+    return cuda_device_ptr(mem);
+  }
+
+  void unmap_pixels(device_ptr mem)
+  {
+    if (!background) {
+      PixelMem pmem = pixel_mem_map[mem];
+
+      cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0));
+    }
+  }
+
+  void pixels_alloc(device_memory &mem)
+  {
+    PixelMem pmem;
+
+    pmem.w = mem.data_width;
+    pmem.h = mem.data_height;
+
+    CUDAContextScope scope(this);
+
+    glGenBuffers(1, &pmem.cuPBO);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+    if (mem.data_type == TYPE_HALF)
+      glBufferData(
+          GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(GLhalf) * 4, NULL, GL_DYNAMIC_DRAW);
+    else
+      glBufferData(
+          GL_PIXEL_UNPACK_BUFFER, pmem.w * pmem.h * sizeof(uint8_t) * 4, NULL, GL_DYNAMIC_DRAW);
+
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+    glActiveTexture(GL_TEXTURE0);
+    glGenTextures(1, &pmem.cuTexId);
+    glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+    if (mem.data_type == TYPE_HALF)
+      glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
+    else
+      glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+    glBindTexture(GL_TEXTURE_2D, 0);
+
+    CUresult result = cuGraphicsGLRegisterBuffer(
+        &pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
+
+    if (result == CUDA_SUCCESS) {
+      mem.device_pointer = pmem.cuTexId;
+      pixel_mem_map[mem.device_pointer] = pmem;
+
+      mem.device_size = mem.memory_size();
+      stats.mem_alloc(mem.device_size);
+
+      return;
+    }
+    else {
+      /* failed to register buffer, fallback to no interop */
+      glDeleteBuffers(1, &pmem.cuPBO);
+      glDeleteTextures(1, &pmem.cuTexId);
+
+      background = true;
+    }
+  }
+
+  void pixels_copy_from(device_memory &mem, int y, int w, int h)
+  {
+    PixelMem pmem = pixel_mem_map[mem.device_pointer];
+
+    CUDAContextScope scope(this);
+
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+    uchar *pixels = (uchar *)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY);
+    size_t offset = sizeof(uchar) * 4 * y * w;
+    memcpy((uchar *)mem.host_pointer + offset, pixels + offset, sizeof(uchar) * 4 * w * h);
+    glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+  }
+
+  void pixels_free(device_memory &mem)
+  {
+    if (mem.device_pointer) {
+      PixelMem pmem = pixel_mem_map[mem.device_pointer];
+
+      CUDAContextScope scope(this);
+
+      cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
+      glDeleteBuffers(1, &pmem.cuPBO);
+      glDeleteTextures(1, &pmem.cuTexId);
+
+      pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
+      mem.device_pointer = 0;
+
+      stats.mem_free(mem.device_size);
+      mem.device_size = 0;
+    }
+  }
+
+  void draw_pixels(device_memory &mem,
+                   int y,
+                   int w,
+                   int h,
+                   int width,
+                   int height,
+                   int dx,
+                   int dy,
+                   int dw,
+                   int dh,
+                   bool transparent,
+                   const DeviceDrawParams &draw_params)
+  {
+    assert(mem.type == MEM_PIXELS);
+
+    if (!background) {
+      const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
+      PixelMem pmem = pixel_mem_map[mem.device_pointer];
+      float *vpointer;
+
+      CUDAContextScope scope(this);
+
+      /* for multi devices, this assumes the inefficient method that we allocate
+       * all pixels on the device even though we only render to a subset */
+      size_t offset = 4 * y * w;
+
+      if (mem.data_type == TYPE_HALF)
+        offset *= sizeof(GLhalf);
+      else
+        offset *= sizeof(uint8_t);
+
+      glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
+      glActiveTexture(GL_TEXTURE0);
+      glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
+      if (mem.data_type == TYPE_HALF) {
+        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void *)offset);
+      }
+      else {
+        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void *)offset);
+      }
+      glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+      if (transparent) {
+        glEnable(GL_BLEND);
+        glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
+      }
+
+      GLint shader_program;
+      if (use_fallback_shader) {
+        if (!bind_fallback_display_space_shader(dw, dh)) {
+          return;
+        }
+        shader_program = fallback_shader_program;
+      }
+      else {
+        draw_params.bind_display_space_shader_cb();
+        glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
+      }
+
+      if (!vertex_buffer) {
+        glGenBuffers(1, &vertex_buffer);
+      }
+
+      glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
+      /* invalidate old contents -
+       * avoids stalling if buffer is still waiting in queue to be rendered */
+      glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
+
+      vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
+
+      if (vpointer) {
+        /* texture coordinate - vertex pair */
+        vpointer[0] = 0.0f;
+        vpointer[1] = 0.0f;
+        vpointer[2] = dx;
+        vpointer[3] = dy;
+
+        vpointer[4] = (float)w / (float)pmem.w;
+        vpointer[5] = 0.0f;
+        vpointer[6] = (float)width + dx;
+        vpointer[7] = dy;
+
+        vpointer[8] = (float)w / (float)pmem.w;
+        vpointer[9] = (float)h / (float)pmem.h;
+        vpointer[10] = (float)width + dx;
+        vpointer[11] = (float)height + dy;
+
+        vpointer[12] = 0.0f;
+        vpointer[13] = (float)h / (float)pmem.h;
+        vpointer[14] = dx;
+        vpointer[15] = (float)height + dy;
+
+        glUnmapBuffer(GL_ARRAY_BUFFER);
+      }
+
+      GLuint vertex_array_object;
+      GLuint position_attribute, texcoord_attribute;
+
+      glGenVertexArrays(1, &vertex_array_object);
+      glBindVertexArray(vertex_array_object);
+
+      texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
+      position_attribute = glGetAttribLocation(shader_program, "pos");
+
+      glEnableVertexAttribArray(texcoord_attribute);
+      glEnableVertexAttribArray(position_attribute);
+
+      glVertexAttribPointer(
+          texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
+      glVertexAttribPointer(position_attribute,
+                            2,
+                            GL_FLOAT,
+                            GL_FALSE,
+                            4 * sizeof(float),
+                            (const GLvoid *)(sizeof(float) * 2));
+
+      glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+      if (use_fallback_shader) {
+        glUseProgram(0);
+      }
+      else {
+        draw_params.unbind_display_space_shader_cb();
+      }
+
+      if (transparent) {
+        glDisable(GL_BLEND);
+      }
+
+      glBindTexture(GL_TEXTURE_2D, 0);
+
+      return;
+    }
+
+    Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params);
+  }
+
+  void thread_run(DeviceTask *task)
+  {
+    CUDAContextScope scope(this);
+
+    if (task->type == DeviceTask::RENDER) {
+      DeviceRequestedFeatures requested_features;
+      if (use_split_kernel()) {
+        if (split_kernel == NULL) {
+          split_kernel = new CUDASplitKernel(this);
+          split_kernel->load_kernels(requested_features);
+        }
+      }
+
+      device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
+
+      /* keep rendering tiles until done */
+      RenderTile tile;
+      DenoisingTask denoising(this, *task);
+
+      while (task->acquire_tile(this, tile)) {
+        if (tile.task == RenderTile::PATH_TRACE) {
+          if (use_split_kernel()) {
+            device_only_memory<uchar> void_buffer(this, "void_buffer");
+            split_kernel->path_trace(task, tile, void_buffer, void_buffer);
+          }
+          else {
+            path_trace(*task, tile, work_tiles);
+          }
+        }
+        else if (tile.task == RenderTile::DENOISE) {
+          tile.sample = tile.start_sample + tile.num_samples;
+
+          denoise(tile, denoising);
+
+          task->update_progress(&tile, tile.w * tile.h);
+        }
+
+        task->release_tile(tile);
+
+        if (task->get_cancel()) {
+          if (task->need_finish_queue == false)
+            break;
+        }
+      }
+
+      work_tiles.free();
+    }
+    else if (task->type == DeviceTask::SHADER) {
+      shader(*task);
+
+      cuda_assert(cuCtxSynchronize());
+    }
+  }
+
+  class CUDADeviceTask : public DeviceTask {
+   public:
+    CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task)
+    {
+      run = function_bind(&CUDADevice::thread_run, device, this);
+    }
+  };
+
+  void task_add(DeviceTask &task)
+  {
+    CUDAContextScope scope(this);
+
+    /* Load texture info. */
+    load_texture_info();
+
+    /* Synchronize all memory copies before executing task. */
+    cuda_assert(cuCtxSynchronize());
+
+    if (task.type == DeviceTask::FILM_CONVERT) {
+      /* must be done in main thread due to opengl access */
+      film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
+    }
+    else {
+      task_pool.push(new CUDADeviceTask(this, task));
+    }
+  }
+
+  void task_wait()
+  {
+    task_pool.wait();
+  }
+
+  void task_cancel()
+  {
+    task_pool.cancel();
+  }
+
+  friend class CUDASplitKernelFunction;
+  friend class CUDASplitKernel;
+  friend class CUDAContextScope;
+};
+
+/* redefine the cuda_assert macro so it can be used outside of the CUDADevice class
+ * now that the definition of that class is complete
+ */
+#undef cuda_assert
+#define cuda_assert(stmt) \
+  { \
+    CUresult result = stmt; \
+\
+    if (result != CUDA_SUCCESS) { \
+      string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
+      if (device->error_msg == "") \
+        device->error_msg = message; \
+      fprintf(stderr, "%s\n", message.c_str()); \
+      /*cuda_abort();*/ \
+      device->cuda_error_documentation(); \
+    } \
+  } \
+  (void)0
+
+/* CUDA context scope. */
+
+CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device)
+{
+  cuda_assert(cuCtxPushCurrent(device->cuContext));
+}
+
+CUDAContextScope::~CUDAContextScope()
+{
+  cuda_assert(cuCtxPopCurrent(NULL));
+}
+
+/* split kernel */
+
+class CUDASplitKernelFunction : public SplitKernelFunction {
+  CUDADevice *device;
+  CUfunction func;
+
+ public:
+  CUDASplitKernelFunction(CUDADevice *device, CUfunction func) : device(device), func(func)
+  {
+  }
+
+  /* enqueue the kernel, returns false if there is an error */
+  bool enqueue(const KernelDimensions &dim, device_memory & /*kg*/, device_memory & /*data*/)
+  {
+    return enqueue(dim, NULL);
+  }
+
+  /* enqueue the kernel, returns false if there is an error */
+  bool enqueue(const KernelDimensions &dim, void *args[])
+  {
+    if (device->have_error())
+      return false;
+
+    CUDAContextScope scope(device);
+
+    /* we ignore dim.local_size for now, as this is faster */
+    int threads_per_block;
+    cuda_assert(
+        cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func));
+
+    int xblocks = (dim.global_size[0] * dim.global_size[1] + threads_per_block - 1) /
+                  threads_per_block;
+
+    cuda_assert(cuFuncSetCacheConfig(func, CU_FUNC_CACHE_PREFER_L1));
+
+    cuda_assert(cuLaunchKernel(func,
+                               xblocks,
+                               1,
+                               1, /* blocks */
+                               threads_per_block,
+                               1,
+                               1, /* threads */
+                               0,
+                               0,
+                               args,
+                               0));
+
+    return !device->have_error();
+  }
+};
+
+CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device), device(device)
+{
+}
+
+uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/,
+                                            device_memory & /*data*/,
+                                            size_t num_threads)
+{
+  CUDAContextScope scope(device);
+
+  device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
+  size_buffer.alloc(1);
+  size_buffer.zero_to_device();
+
+  uint threads = num_threads;
+  CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
+
+  struct args_t {
+    uint *num_threads;
+    CUdeviceptr *size;
+  };
+
+  args_t args = {&threads, &d_size};
+
+  CUfunction state_buffer_size;
+  cuda_assert(
+      cuModuleGetFunction(&state_buffer_size, device->cuModule, "kernel_cuda_state_buffer_size"));
+
+  cuda_assert(cuLaunchKernel(state_buffer_size, 1, 1, 1, 1, 1, 1, 0, 0, (void **)&args, 0));
+
+  size_buffer.copy_from_device(0, 1, 1);
+  size_t size = size_buffer[0];
+  size_buffer.free();
+
+  return size;
+}
+
+bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim,
+                                                     RenderTile &rtile,
+                                                     int num_global_elements,
+                                                     device_memory & /*kernel_globals*/,
+                                                     device_memory & /*kernel_data*/,
+                                                     device_memory &split_data,
+                                                     device_memory &ray_state,
+                                                     device_memory &queue_index,
+                                                     device_memory &use_queues_flag,
+                                                     device_memory &work_pool_wgs)
+{
+  CUDAContextScope scope(device);
+
+  CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer);
+  CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer);
+  CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer);
+  CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
+  CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
+
+  CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
+
+  int end_sample = rtile.start_sample + rtile.num_samples;
+  int queue_size = dim.global_size[0] * dim.global_size[1];
+
+  struct args_t {
+    CUdeviceptr *split_data_buffer;
+    int *num_elements;
+    CUdeviceptr *ray_state;
+    int *start_sample;
+    int *end_sample;
+    int *sx;
+    int *sy;
+    int *sw;
+    int *sh;
+    int *offset;
+    int *stride;
+    CUdeviceptr *queue_index;
+    int *queuesize;
+    CUdeviceptr *use_queues_flag;
+    CUdeviceptr *work_pool_wgs;
+    int *num_samples;
+    CUdeviceptr *buffer;
+  };
+
+  args_t args = {&d_split_data,
+                 &num_global_elements,
+                 &d_ray_state,
+                 &rtile.start_sample,
+                 &end_sample,
+                 &rtile.x,
+                 &rtile.y,
+                 &rtile.w,
+                 &rtile.h,
+                 &rtile.offset,
+                 &rtile.stride,
+                 &d_queue_index,
+                 &queue_size,
+                 &d_use_queues_flag,
+                 &d_work_pool_wgs,
+                 &rtile.num_samples,
+                 &d_buffer};
+
+  CUfunction data_init;
+  cuda_assert(
+      cuModuleGetFunction(&data_init, device->cuModule, "kernel_cuda_path_trace_data_init"));
+  if (device->have_error()) {
+    return false;
+  }
+
+  CUDASplitKernelFunction(device, data_init).enqueue(dim, (void **)&args);
+
+  return !device->have_error();
+}
+
+SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name,
+                                                                const DeviceRequestedFeatures &)
+{
+  CUDAContextScope scope(device);
+  CUfunction func;
+
+  cuda_assert(
+      cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()));
+  if (device->have_error()) {
+    device->cuda_error_message(
+        string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data()));
+    return NULL;
+  }
+
+  return new CUDASplitKernelFunction(device, func);
+}
+
+int2 CUDASplitKernel::split_kernel_local_size()
+{
+  return make_int2(32, 1);
+}
+
+int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg,
+                                               device_memory &data,
+                                               DeviceTask * /*task*/)
+{
+  CUDAContextScope scope(device);
+  size_t free;
+  size_t total;
+
+  cuda_assert(cuMemGetInfo(&free, &total));
+
+  VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free)
+          << " bytes. (" << string_human_readable_size(free) << ").";
+
+  size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
+  size_t side = round_down((int)sqrt(num_elements), 32);
+  int2 global_size = make_int2(side, round_down(num_elements / side, 16));
+  VLOG(1) << "Global size: " << global_size << ".";
+  return global_size;
+}
+
+bool device_cuda_init()
+{
+#ifdef WITH_CUDA_DYNLOAD
+  static bool initialized = false;
+  static bool result = false;
+
+  if (initialized)
+    return result;
+
+  initialized = true;
+  int cuew_result = cuewInit(CUEW_INIT_CUDA);
+  if (cuew_result == CUEW_SUCCESS) {
+    VLOG(1) << "CUEW initialization succeeded";
+    if (CUDADevice::have_precompiled_kernels()) {
+      VLOG(1) << "Found precompiled kernels";
+      result = true;
+    }
+#  ifndef _WIN32
+    else if (cuewCompilerPath() != NULL) {
+      VLOG(1) << "Found CUDA compiler " << cuewCompilerPath();
+      result = true;
+    }
+    else {
+      VLOG(1) << "Neither precompiled kernels nor CUDA compiler was found,"
+              << " unable to use CUDA";
+    }
+#  endif
+  }
+  else {
+    VLOG(1) << "CUEW initialization failed: "
+            << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ? "Error setting up atexit() handler" :
+                                                            "Error opening the library");
+  }
+
+  return result;
+#else  /* WITH_CUDA_DYNLOAD */
+  return true;
+#endif /* WITH_CUDA_DYNLOAD */
+}
+
+Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
+{
+  return new CUDADevice(info, stats, profiler, background);
+}
+
+static CUresult device_cuda_safe_init()
+{
+#ifdef _WIN32
+  __try {
+    return cuInit(0);
+  }
+  __except (EXCEPTION_EXECUTE_HANDLER) {
+    /* Ignore crashes inside the CUDA driver and hope we can
+     * survive even with corrupted CUDA installs. */
+    fprintf(stderr, "Cycles CUDA: driver crashed, continuing without CUDA.\n");
+  }
+
+  return CUDA_ERROR_NO_DEVICE;
+#else
+  return cuInit(0);
+#endif
+}
+
+void device_cuda_info(vector<DeviceInfo> &devices)
+{
+  CUresult result = device_cuda_safe_init();
+  if (result != CUDA_SUCCESS) {
+    if (result != CUDA_ERROR_NO_DEVICE)
+      fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result));
+    return;
+  }
+
+  int count = 0;
+  result = cuDeviceGetCount(&count);
+  if (result != CUDA_SUCCESS) {
+    fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result));
+    return;
+  }
+
+  vector<DeviceInfo> display_devices;
+
+  for (int num = 0; num < count; num++) {
+    char name[256];
+
+    result = cuDeviceGetName(name, 256, num);
+    if (result != CUDA_SUCCESS) {
+      fprintf(stderr, "CUDA cuDeviceGetName: %s\n", cuewErrorString(result));
+      continue;
+    }
+
+    int major;
+    cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
+    if (major < 3) {
+      VLOG(1) << "Ignoring device \"" << name << "\", this graphics card is no longer supported.";
+      continue;
+    }
+
+    DeviceInfo info;
+
+    info.type = DEVICE_CUDA;
+    info.description = string(name);
+    info.num = num;
+
+    info.has_half_images = (major >= 3);
+    info.has_volume_decoupled = false;
+
+    int pci_location[3] = {0, 0, 0};
+    cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
+    cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num);
+    cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num);
+    info.id = string_printf("CUDA_%s_%04x:%02x:%02x",
+                            name,
+                            (unsigned int)pci_location[0],
+                            (unsigned int)pci_location[1],
+                            (unsigned int)pci_location[2]);
+
+    /* If device has a kernel timeout and no compute preemption, we assume
+     * it is connected to a display and will freeze the display while doing
+     * computations. */
+    int timeout_attr = 0, preempt_attr = 0;
+    cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
+    cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
+
+    /* The CUDA driver reports compute preemption as not being available on
+     * Windows 10 even when it is, due to an issue in application profiles.
+     * Detect case where we expect it to be available and override. */
+    if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) {
+      VLOG(1) << "Assuming device has compute preemption on Windows 10.";
+      preempt_attr = 1;
+    }
+
+    if (timeout_attr && !preempt_attr) {
+      VLOG(1) << "Device is recognized as display.";
+      info.description += " (Display)";
+      info.display_device = true;
+      display_devices.push_back(info);
+    }
+    else {
+      VLOG(1) << "Device has compute preemption or is not used for display.";
+      devices.push_back(info);
+    }
+    VLOG(1) << "Added device \"" << name << "\" with id \"" << info.id << "\".";
+  }
+
+  if (!display_devices.empty())
+    devices.insert(devices.end(), display_devices.begin(), display_devices.end());
+}
+
+string device_cuda_capabilities()
+{
+  CUresult result = device_cuda_safe_init();
+  if (result != CUDA_SUCCESS) {
+    if (result != CUDA_ERROR_NO_DEVICE) {
+      return string("Error initializing CUDA: ") + cuewErrorString(result);
+    }
+    return "No CUDA device found\n";
+  }
+
+  int count;
+  result = cuDeviceGetCount(&count);
+  if (result != CUDA_SUCCESS) {
+    return string("Error getting devices: ") + cuewErrorString(result);
+  }
+
+  string capabilities = "";
+  for (int num = 0; num < count; num++) {
+    char name[256];
+    if (cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) {
+      continue;
+    }
+    capabilities += string("\t") + name + "\n";
+    int value;
+#define GET_ATTR(attr) \
+  { \
+    if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
+      capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
+    } \
+  } \
+  (void)0
+    /* TODO(sergey): Strip all attributes which are not useful for us
+     * or does not depend on the driver.
+     */
+    GET_ATTR(MAX_THREADS_PER_BLOCK);
+    GET_ATTR(MAX_BLOCK_DIM_X);
+    GET_ATTR(MAX_BLOCK_DIM_Y);
+    GET_ATTR(MAX_BLOCK_DIM_Z);
+    GET_ATTR(MAX_GRID_DIM_X);
+    GET_ATTR(MAX_GRID_DIM_Y);
+    GET_ATTR(MAX_GRID_DIM_Z);
+    GET_ATTR(MAX_SHARED_MEMORY_PER_BLOCK);
+    GET_ATTR(SHARED_MEMORY_PER_BLOCK);
+    GET_ATTR(TOTAL_CONSTANT_MEMORY);
+    GET_ATTR(WARP_SIZE);
+    GET_ATTR(MAX_PITCH);
+    GET_ATTR(MAX_REGISTERS_PER_BLOCK);
+    GET_ATTR(REGISTERS_PER_BLOCK);
+    GET_ATTR(CLOCK_RATE);
+    GET_ATTR(TEXTURE_ALIGNMENT);
+    GET_ATTR(GPU_OVERLAP);
+    GET_ATTR(MULTIPROCESSOR_COUNT);
+    GET_ATTR(KERNEL_EXEC_TIMEOUT);
+    GET_ATTR(INTEGRATED);
+    GET_ATTR(CAN_MAP_HOST_MEMORY);
+    GET_ATTR(COMPUTE_MODE);
+    GET_ATTR(MAXIMUM_TEXTURE1D_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES);
+    GET_ATTR(SURFACE_ALIGNMENT);
+    GET_ATTR(CONCURRENT_KERNELS);
+    GET_ATTR(ECC_ENABLED);
+    GET_ATTR(TCC_DRIVER);
+    GET_ATTR(MEMORY_CLOCK_RATE);
+    GET_ATTR(GLOBAL_MEMORY_BUS_WIDTH);
+    GET_ATTR(L2_CACHE_SIZE);
+    GET_ATTR(MAX_THREADS_PER_MULTIPROCESSOR);
+    GET_ATTR(ASYNC_ENGINE_COUNT);
+    GET_ATTR(UNIFIED_ADDRESSING);
+    GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_LAYERS);
+    GET_ATTR(CAN_TEX2D_GATHER);
+    GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE);
+    GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE);
+    GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE);
+    GET_ATTR(TEXTURE_PITCH_ALIGNMENT);
+    GET_ATTR(MAXIMUM_TEXTURECUBEMAP_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_SURFACE1D_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE2D_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE2D_HEIGHT);
+    GET_ATTR(MAXIMUM_SURFACE3D_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE3D_HEIGHT);
+    GET_ATTR(MAXIMUM_SURFACE3D_DEPTH);
+    GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_HEIGHT);
+    GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_SURFACECUBEMAP_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH);
+    GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS);
+    GET_ATTR(MAXIMUM_TEXTURE1D_LINEAR_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_HEIGHT);
+    GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_PITCH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH);
+    GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT);
+    GET_ATTR(COMPUTE_CAPABILITY_MAJOR);
+    GET_ATTR(COMPUTE_CAPABILITY_MINOR);
+    GET_ATTR(MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH);
+    GET_ATTR(STREAM_PRIORITIES_SUPPORTED);
+    GET_ATTR(GLOBAL_L1_CACHE_SUPPORTED);
+    GET_ATTR(LOCAL_L1_CACHE_SUPPORTED);
+    GET_ATTR(MAX_SHARED_MEMORY_PER_MULTIPROCESSOR);
+    GET_ATTR(MAX_REGISTERS_PER_MULTIPROCESSOR);
+    GET_ATTR(MANAGED_MEMORY);
+    GET_ATTR(MULTI_GPU_BOARD);
+    GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
+#undef GET_ATTR
+    capabilities += "\n";
+  }
+
+  return capabilities;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
--- a/intern/cycles/device/device_split_kernel.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_split_kernel.cpp	2020-01-10 20:42:43.460923388 +0300
@@ -55,6 +55,10 @@
   kernel_next_iteration_setup = NULL;
   kernel_indirect_subsurface = NULL;
   kernel_buffer_update = NULL;
+  kernel_adaptive_stopping = NULL;
+  kernel_adaptive_filter_x = NULL;
+  kernel_adaptive_filter_y = NULL;
+  kernel_adaptive_adjust_samples = NULL;
 }

 DeviceSplitKernel::~DeviceSplitKernel()
@@ -83,6 +87,10 @@
   delete kernel_next_iteration_setup;
   delete kernel_indirect_subsurface;
   delete kernel_buffer_update;
+  delete kernel_adaptive_stopping;
+  delete kernel_adaptive_filter_x;
+  delete kernel_adaptive_filter_y;
+  delete kernel_adaptive_adjust_samples;
 }

 bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures &requested_features)
@@ -114,6 +122,10 @@
   LOAD_KERNEL(next_iteration_setup);
   LOAD_KERNEL(indirect_subsurface);
   LOAD_KERNEL(buffer_update);
+  LOAD_KERNEL(adaptive_stopping);
+  LOAD_KERNEL(adaptive_filter_x);
+  LOAD_KERNEL(adaptive_filter_y);
+  LOAD_KERNEL(adaptive_adjust_samples);

 #undef LOAD_KERNEL

@@ -208,6 +220,19 @@

     RenderTile subtile = tile;
     subtile.start_sample = tile.sample;
+
+    if (task->integrator_adaptive) {
+      int step_samples = subtile.start_sample % 4;
+      /* Round so that we end up on multiples of four for adaptive sampling. */
+      if (step_samples == 3) {
+        step_samples = 2;
+      }
+      else if (step_samples > 4) {
+        step_samples &= 0xfffffffc;
+      }
+      samples_per_second = max(1, step_samples - (subtile.start_sample % 4));
+    }
+
     subtile.num_samples = min(samples_per_second,
                               tile.start_sample + tile.num_samples - tile.sample);

@@ -302,6 +327,22 @@
       }
     }

+    if (task->integrator_adaptive && ((tile.sample + subtile.num_samples - 1) & 3) == 3) {
+      size_t buffer_size[2];
+      buffer_size[0] = round_up(tile.w, local_size[0]);
+      buffer_size[1] = round_up(tile.h, local_size[1]);
+      kernel_adaptive_stopping->enqueue(
+          KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+      buffer_size[0] = round_up(tile.h, local_size[0]);
+      buffer_size[1] = round_up(1, local_size[1]);
+      kernel_adaptive_filter_x->enqueue(
+          KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+      buffer_size[0] = round_up(tile.w, local_size[0]);
+      buffer_size[1] = round_up(1, local_size[1]);
+      kernel_adaptive_filter_y->enqueue(
+          KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+    }
+
     double time_per_sample = ((time_dt() - start_time) / subtile.num_samples);

     if (avg_time_per_sample == 0.0) {
@@ -324,6 +365,28 @@
     }
   }

+  if (task->integrator_adaptive) {
+    /* Reset the start samples. */
+    RenderTile subtile = tile;
+    subtile.start_sample = tile.start_sample;
+    subtile.num_samples = tile.sample - tile.start_sample;
+    enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size),
+                                   subtile,
+                                   num_global_elements,
+                                   kgbuffer,
+                                   kernel_data,
+                                   split_data,
+                                   ray_state,
+                                   queue_index,
+                                   use_queues_flag,
+                                   work_pool_wgs);
+    size_t buffer_size[2];
+    buffer_size[0] = round_up(tile.w, local_size[0]);
+    buffer_size[1] = round_up(tile.h, local_size[1]);
+    kernel_adaptive_adjust_samples->enqueue(
+        KernelDimensions(buffer_size, local_size), kgbuffer, kernel_data);
+  }
+
   return true;
 }

diff -Naur a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
--- a/intern/cycles/device/device_split_kernel.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_split_kernel.h	2020-01-10 20:42:43.460923388 +0300
@@ -75,6 +75,10 @@
   SplitKernelFunction *kernel_next_iteration_setup;
   SplitKernelFunction *kernel_indirect_subsurface;
   SplitKernelFunction *kernel_buffer_update;
+  SplitKernelFunction *kernel_adaptive_stopping;
+  SplitKernelFunction *kernel_adaptive_filter_x;
+  SplitKernelFunction *kernel_adaptive_filter_y;
+  SplitKernelFunction *kernel_adaptive_adjust_samples;

   /* Global memory variables [porting]; These memory is used for
    * co-operation between different kernels; Data written by one
diff -Naur a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
--- a/intern/cycles/device/device_task.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/device_task.h	2020-01-10 20:42:43.460923388 +0300
@@ -114,6 +114,7 @@

   bool need_finish_queue;
   bool integrator_branched;
+  bool integrator_adaptive;
   int2 requested_tile_size;

  protected:
diff -Naur a/intern/cycles/device/device_task.h.orig b/intern/cycles/device/device_task.h.orig
--- a/intern/cycles/device/device_task.h.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/device/device_task.h.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DEVICE_TASK_H__
+#define __DEVICE_TASK_H__
+
+#include "device/device_memory.h"
+
+#include "util/util_function.h"
+#include "util/util_list.h"
+#include "util/util_task.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Device Task */
+
+class Device;
+class RenderBuffers;
+class RenderTile;
+class Tile;
+
+class DenoiseParams {
+ public:
+  /* Pixel radius for neighboring pixels to take into account. */
+  int radius;
+  /* Controls neighbor pixel weighting for the denoising filter. */
+  float strength;
+  /* Preserve more or less detail based on feature passes. */
+  float feature_strength;
+  /* When removing pixels that don't carry information,
+   * use a relative threshold instead of an absolute one. */
+  bool relative_pca;
+  /* How many frames before and after the current center frame are included. */
+  int neighbor_frames;
+  /* Clamp the input to the range of +-1e8. Should be enough for any legitimate data. */
+  bool clamp_input;
+  /* Controls which passes the OptiX AI denoiser should use as input. */
+  int optix_input_passes;
+
+  DenoiseParams()
+  {
+    radius = 8;
+    strength = 0.5f;
+    feature_strength = 0.5f;
+    relative_pca = false;
+    neighbor_frames = 2;
+    clamp_input = true;
+    optix_input_passes = 1;
+  }
+};
+
+class DeviceTask : public Task {
+ public:
+  typedef enum { RENDER, FILM_CONVERT, SHADER } Type;
+  Type type;
+
+  int x, y, w, h;
+  device_ptr rgba_byte;
+  device_ptr rgba_half;
+  device_ptr buffer;
+  int sample;
+  int num_samples;
+  int offset, stride;
+
+  device_ptr shader_input;
+  device_ptr shader_output;
+  int shader_eval_type;
+  int shader_filter;
+  int shader_x, shader_w;
+
+  int passes_size;
+
+  explicit DeviceTask(Type type = RENDER);
+
+  int get_subtask_count(int num, int max_size = 0);
+  void split(list<DeviceTask> &tasks, int num, int max_size = 0);
+
+  void update_progress(RenderTile *rtile, int pixel_samples = -1);
+
+  function<bool(Device *device, RenderTile &)> acquire_tile;
+  function<void(long, int)> update_progress_sample;
+  function<void(RenderTile &)> update_tile_sample;
+  function<void(RenderTile &)> release_tile;
+  function<bool()> get_cancel;
+  function<void(RenderTile *, Device *)> map_neighbor_tiles;
+  function<void(RenderTile *, Device *)> unmap_neighbor_tiles;
+
+  DenoiseParams denoising;
+  bool denoising_from_render;
+  vector<int> denoising_frames;
+
+  bool denoising_do_filter;
+  bool denoising_use_optix;
+  bool denoising_write_passes;
+
+  int pass_stride;
+  int frame_stride;
+  int target_pass_stride;
+  int pass_denoising_data;
+  int pass_denoising_clean;
+
+  bool need_finish_queue;
+  bool integrator_branched;
+  int2 requested_tile_size;
+
+ protected:
+  double last_update_time;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __DEVICE_TASK_H__ */
diff -Naur a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
--- a/intern/cycles/device/opencl/opencl.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/opencl/opencl.h	2020-01-10 20:42:43.460923388 +0300
@@ -445,6 +445,7 @@
                     device_ptr rgba_byte,
                     device_ptr rgba_half);
   void shader(DeviceTask &task);
+  void update_adaptive(DeviceTask &task, RenderTile &tile, int sample);

   void denoise(RenderTile &tile, DenoisingTask &denoising);

diff -Naur a/intern/cycles/device/opencl/opencl.h.orig b/intern/cycles/device/opencl/opencl.h.orig
--- a/intern/cycles/device/opencl/opencl.h.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/device/opencl/opencl.h.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,656 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_OPENCL
+
+#  include "device/device.h"
+#  include "device/device_denoising.h"
+#  include "device/device_split_kernel.h"
+
+#  include "util/util_map.h"
+#  include "util/util_param.h"
+#  include "util/util_string.h"
+
+#  include "clew.h"
+
+#  include "device/opencl/memory_manager.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Disable workarounds, seems to be working fine on latest drivers. */
+#  define CYCLES_DISABLE_DRIVER_WORKAROUNDS
+
+/* Define CYCLES_DISABLE_DRIVER_WORKAROUNDS to disable workaounds for testing */
+#  ifndef CYCLES_DISABLE_DRIVER_WORKAROUNDS
+/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
+#    undef clEnqueueNDRangeKernel
+#    define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
+      CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \
+      clFinish(a);
+
+#    undef clEnqueueWriteBuffer
+#    define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \
+      CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \
+      clFinish(a);
+
+#    undef clEnqueueReadBuffer
+#    define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
+      CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
+      clFinish(a);
+#  endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
+
+#  define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
+
+struct OpenCLPlatformDevice {
+  OpenCLPlatformDevice(cl_platform_id platform_id,
+                       const string &platform_name,
+                       cl_device_id device_id,
+                       cl_device_type device_type,
+                       const string &device_name,
+                       const string &hardware_id,
+                       const string &device_extensions)
+      : platform_id(platform_id),
+        platform_name(platform_name),
+        device_id(device_id),
+        device_type(device_type),
+        device_name(device_name),
+        hardware_id(hardware_id),
+        device_extensions(device_extensions)
+  {
+  }
+  cl_platform_id platform_id;
+  string platform_name;
+  cl_device_id device_id;
+  cl_device_type device_type;
+  string device_name;
+  string hardware_id;
+  string device_extensions;
+};
+
+/* Contains all static OpenCL helper functions. */
+class OpenCLInfo {
+ public:
+  static cl_device_type device_type();
+  static bool use_debug();
+  static bool device_supported(const string &platform_name, const cl_device_id device_id);
+  static bool platform_version_check(cl_platform_id platform, string *error = NULL);
+  static bool device_version_check(cl_device_id device, string *error = NULL);
+  static string get_hardware_id(const string &platform_name, cl_device_id device_id);
+  static void get_usable_devices(vector<OpenCLPlatformDevice> *usable_devices,
+                                 bool force_all = false);
+
+  /* ** Some handy shortcuts to low level cl*GetInfo() functions. ** */
+
+  /* Platform information. */
+  static bool get_num_platforms(cl_uint *num_platforms, cl_int *error = NULL);
+  static cl_uint get_num_platforms();
+
+  static bool get_platforms(vector<cl_platform_id> *platform_ids, cl_int *error = NULL);
+  static vector<cl_platform_id> get_platforms();
+
+  static bool get_platform_name(cl_platform_id platform_id, string *platform_name);
+  static string get_platform_name(cl_platform_id platform_id);
+
+  static bool get_num_platform_devices(cl_platform_id platform_id,
+                                       cl_device_type device_type,
+                                       cl_uint *num_devices,
+                                       cl_int *error = NULL);
+  static cl_uint get_num_platform_devices(cl_platform_id platform_id, cl_device_type device_type);
+
+  static bool get_platform_devices(cl_platform_id platform_id,
+                                   cl_device_type device_type,
+                                   vector<cl_device_id> *device_ids,
+                                   cl_int *error = NULL);
+  static vector<cl_device_id> get_platform_devices(cl_platform_id platform_id,
+                                                   cl_device_type device_type);
+
+  /* Device information. */
+  static bool get_device_name(cl_device_id device_id, string *device_name, cl_int *error = NULL);
+
+  static string get_device_name(cl_device_id device_id);
+
+  static bool get_device_extensions(cl_device_id device_id,
+                                    string *device_extensions,
+                                    cl_int *error = NULL);
+
+  static string get_device_extensions(cl_device_id device_id);
+
+  static bool get_device_type(cl_device_id device_id,
+                              cl_device_type *device_type,
+                              cl_int *error = NULL);
+  static cl_device_type get_device_type(cl_device_id device_id);
+
+  static bool get_driver_version(cl_device_id device_id,
+                                 int *major,
+                                 int *minor,
+                                 cl_int *error = NULL);
+
+  static int mem_sub_ptr_alignment(cl_device_id device_id);
+
+  /* Get somewhat more readable device name.
+   * Main difference is AMD OpenCL here which only gives code name
+   * for the regular device name. This will give more sane device
+   * name using some extensions.
+   */
+  static string get_readable_device_name(cl_device_id device_id);
+};
+
+/* Thread safe cache for contexts and programs.
+ */
+class OpenCLCache {
+  struct Slot {
+    struct ProgramEntry {
+      ProgramEntry();
+      ProgramEntry(const ProgramEntry &rhs);
+      ~ProgramEntry();
+      cl_program program;
+      thread_mutex *mutex;
+    };
+
+    Slot();
+    Slot(const Slot &rhs);
+    ~Slot();
+
+    thread_mutex *context_mutex;
+    cl_context context;
+    typedef map<ustring, ProgramEntry> EntryMap;
+    EntryMap programs;
+  };
+
+  /* key is combination of platform ID and device ID */
+  typedef pair<cl_platform_id, cl_device_id> PlatformDevicePair;
+
+  /* map of Slot objects */
+  typedef map<PlatformDevicePair, Slot> CacheMap;
+  CacheMap cache;
+
+  /* MD5 hash of the kernel source. */
+  string kernel_md5;
+
+  thread_mutex cache_lock;
+  thread_mutex kernel_md5_lock;
+
+  /* lazy instantiate */
+  static OpenCLCache &global_instance();
+
+ public:
+  enum ProgramName {
+    OCL_DEV_BASE_PROGRAM,
+    OCL_DEV_MEGAKERNEL_PROGRAM,
+  };
+
+  /* Lookup context in the cache. If this returns NULL, slot_locker
+   * will be holding a lock for the cache. slot_locker should refer to a
+   * default constructed thread_scoped_lock. */
+  static cl_context get_context(cl_platform_id platform,
+                                cl_device_id device,
+                                thread_scoped_lock &slot_locker);
+  /* Same as above. */
+  static cl_program get_program(cl_platform_id platform,
+                                cl_device_id device,
+                                ustring key,
+                                thread_scoped_lock &slot_locker);
+
+  /* Store context in the cache. You MUST have tried to get the item before storing to it. */
+  static void store_context(cl_platform_id platform,
+                            cl_device_id device,
+                            cl_context context,
+                            thread_scoped_lock &slot_locker);
+  /* Same as above. */
+  static void store_program(cl_platform_id platform,
+                            cl_device_id device,
+                            cl_program program,
+                            ustring key,
+                            thread_scoped_lock &slot_locker);
+
+  static string get_kernel_md5();
+};
+
+#  define opencl_device_assert(device, stmt) \
+    { \
+      cl_int err = stmt; \
+\
+      if (err != CL_SUCCESS) { \
+        string message = string_printf( \
+            "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
+        if ((device)->error_message() == "") \
+          (device)->set_error(message); \
+        fprintf(stderr, "%s\n", message.c_str()); \
+      } \
+    } \
+    (void)0
+
+#  define opencl_assert(stmt) \
+    { \
+      cl_int err = stmt; \
+\
+      if (err != CL_SUCCESS) { \
+        string message = string_printf( \
+            "OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
+        if (error_msg == "") \
+          error_msg = message; \
+        fprintf(stderr, "%s\n", message.c_str()); \
+      } \
+    } \
+    (void)0
+
+class OpenCLDevice : public Device {
+ public:
+  DedicatedTaskPool task_pool;
+
+  /* Task pool for required kernels (base, AO kernels during foreground rendering) */
+  TaskPool load_required_kernel_task_pool;
+  /* Task pool for optional kernels (feature kernels during foreground rendering) */
+  TaskPool load_kernel_task_pool;
+  cl_context cxContext;
+  cl_command_queue cqCommandQueue;
+  cl_platform_id cpPlatform;
+  cl_device_id cdDevice;
+  cl_int ciErr;
+  int device_num;
+  bool use_preview_kernels;
+
+  class OpenCLProgram {
+   public:
+    OpenCLProgram() : loaded(false), needs_compiling(true), program(NULL), device(NULL)
+    {
+    }
+    OpenCLProgram(OpenCLDevice *device,
+                  const string &program_name,
+                  const string &kernel_name,
+                  const string &kernel_build_options,
+                  bool use_stdout = true);
+    ~OpenCLProgram();
+
+    void add_kernel(ustring name);
+
+    /* Try to load the program from device cache or disk */
+    bool load();
+    /* Compile the kernel (first separate, failback to local) */
+    void compile();
+    /* Create the OpenCL kernels after loading or compiling */
+    void create_kernels();
+
+    bool is_loaded() const
+    {
+      return loaded;
+    }
+    const string &get_log() const
+    {
+      return log;
+    }
+    void report_error();
+
+    /* Wait until this kernel is available to be used
+     * It will return true when the kernel is available.
+     * It will return false when the kernel is not available
+     * or could not be loaded. */
+    bool wait_for_availability();
+
+    cl_kernel operator()();
+    cl_kernel operator()(ustring name);
+
+    void release();
+
+   private:
+    bool build_kernel(const string *debug_src);
+    /* Build the program by calling the own process.
+     * This is required for multithreaded OpenCL compilation, since most Frameworks serialize
+     * build calls internally if they come from the same process.
+     * If that is not supported, this function just returns false.
+     */
+    bool compile_separate(const string &clbin);
+    /* Build the program by calling OpenCL directly. */
+    bool compile_kernel(const string *debug_src);
+    /* Loading and saving the program from/to disk. */
+    bool load_binary(const string &clbin, const string *debug_src = NULL);
+    bool save_binary(const string &clbin);
+
+    void add_log(const string &msg, bool is_debug);
+    void add_error(const string &msg);
+
+    bool loaded;
+    bool needs_compiling;
+
+    cl_program program;
+    OpenCLDevice *device;
+
+    /* Used for the OpenCLCache key. */
+    string program_name;
+
+    string kernel_file, kernel_build_options, device_md5;
+
+    bool use_stdout;
+    string log, error_msg;
+    string compile_output;
+
+    map<ustring, cl_kernel> kernels;
+  };
+
+  /* Container for all types of split programs. */
+  class OpenCLSplitPrograms {
+   public:
+    OpenCLDevice *device;
+    OpenCLProgram program_split;
+    OpenCLProgram program_lamp_emission;
+    OpenCLProgram program_do_volume;
+    OpenCLProgram program_indirect_background;
+    OpenCLProgram program_shader_eval;
+    OpenCLProgram program_holdout_emission_blurring_pathtermination_ao;
+    OpenCLProgram program_subsurface_scatter;
+    OpenCLProgram program_direct_lighting;
+    OpenCLProgram program_shadow_blocked_ao;
+    OpenCLProgram program_shadow_blocked_dl;
+
+    OpenCLSplitPrograms(OpenCLDevice *device);
+    ~OpenCLSplitPrograms();
+
+    /* Load the kernels and put the created kernels in the given
+     * `programs` parameter. */
+    void load_kernels(vector<OpenCLProgram *> &programs,
+                      const DeviceRequestedFeatures &requested_features,
+                      bool is_preview = false);
+  };
+
+  DeviceSplitKernel *split_kernel;
+
+  OpenCLProgram base_program;
+  OpenCLProgram bake_program;
+  OpenCLProgram displace_program;
+  OpenCLProgram background_program;
+  OpenCLProgram denoising_program;
+
+  OpenCLSplitPrograms kernel_programs;
+  OpenCLSplitPrograms preview_programs;
+
+  typedef map<string, device_vector<uchar> *> ConstMemMap;
+  typedef map<string, device_ptr> MemMap;
+
+  ConstMemMap const_mem_map;
+  MemMap mem_map;
+
+  bool device_initialized;
+  string platform_name;
+  string device_name;
+
+  bool opencl_error(cl_int err);
+  void opencl_error(const string &message);
+  void opencl_assert_err(cl_int err, const char *where);
+
+  OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
+  ~OpenCLDevice();
+
+  static void CL_CALLBACK context_notify_callback(const char *err_info,
+                                                  const void * /*private_info*/,
+                                                  size_t /*cb*/,
+                                                  void *user_data);
+
+  bool opencl_version_check();
+  OpenCLSplitPrograms *get_split_programs();
+
+  string device_md5_hash(string kernel_custom_build_options = "");
+  bool load_kernels(const DeviceRequestedFeatures &requested_features);
+  void load_required_kernels(const DeviceRequestedFeatures &requested_features);
+  void load_preview_kernels();
+
+  bool wait_for_availability(const DeviceRequestedFeatures &requested_features);
+  DeviceKernelStatus get_active_kernel_switch_state();
+
+  /* Get the name of the opencl program for the given kernel */
+  const string get_opencl_program_name(const string &kernel_name);
+  /* Get the program file name to compile (*.cl) for the given kernel */
+  const string get_opencl_program_filename(const string &kernel_name);
+  string get_build_options(const DeviceRequestedFeatures &requested_features,
+                           const string &opencl_program_name,
+                           bool preview_kernel = false);
+  /* Enable the default features to reduce recompilation events */
+  void enable_default_features(DeviceRequestedFeatures &features);
+
+  void mem_alloc(device_memory &mem);
+  void mem_copy_to(device_memory &mem);
+  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem);
+  void mem_zero(device_memory &mem);
+  void mem_free(device_memory &mem);
+
+  int mem_sub_ptr_alignment();
+
+  void const_copy_to(const char *name, void *host, size_t size);
+  void tex_alloc(device_memory &mem);
+  void tex_free(device_memory &mem);
+
+  size_t global_size_round_up(int group_size, int global_size);
+  void enqueue_kernel(cl_kernel kernel,
+                      size_t w,
+                      size_t h,
+                      bool x_workgroups = false,
+                      size_t max_workgroup_size = -1);
+  void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
+  void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
+
+  void film_convert(DeviceTask &task,
+                    device_ptr buffer,
+                    device_ptr rgba_byte,
+                    device_ptr rgba_half);
+  void shader(DeviceTask &task);
+
+  void denoise(RenderTile &tile, DenoisingTask &denoising);
+
+  class OpenCLDeviceTask : public DeviceTask {
+   public:
+    OpenCLDeviceTask(OpenCLDevice *device, DeviceTask &task) : DeviceTask(task)
+    {
+      run = function_bind(&OpenCLDevice::thread_run, device, this);
+    }
+  };
+
+  int get_split_task_count(DeviceTask & /*task*/)
+  {
+    return 1;
+  }
+
+  void task_add(DeviceTask &task)
+  {
+    task_pool.push(new OpenCLDeviceTask(this, task));
+  }
+
+  void task_wait()
+  {
+    task_pool.wait();
+  }
+
+  void task_cancel()
+  {
+    task_pool.cancel();
+  }
+
+  void thread_run(DeviceTask *task);
+
+  virtual BVHLayoutMask get_bvh_layout_mask() const
+  {
+    return BVH_LAYOUT_BVH2;
+  }
+
+  virtual bool show_samples() const
+  {
+    return true;
+  }
+
+ protected:
+  string kernel_build_options(const string *debug_src = NULL);
+
+  void mem_zero_kernel(device_ptr ptr, size_t size);
+
+  bool denoising_non_local_means(device_ptr image_ptr,
+                                 device_ptr guide_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr out_ptr,
+                                 DenoisingTask *task);
+  bool denoising_construct_transform(DenoisingTask *task);
+  bool denoising_accumulate(device_ptr color_ptr,
+                            device_ptr color_variance_ptr,
+                            device_ptr scale_ptr,
+                            int frame,
+                            DenoisingTask *task);
+  bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
+  bool denoising_combine_halves(device_ptr a_ptr,
+                                device_ptr b_ptr,
+                                device_ptr mean_ptr,
+                                device_ptr variance_ptr,
+                                int r,
+                                int4 rect,
+                                DenoisingTask *task);
+  bool denoising_divide_shadow(device_ptr a_ptr,
+                               device_ptr b_ptr,
+                               device_ptr sample_variance_ptr,
+                               device_ptr sv_variance_ptr,
+                               device_ptr buffer_variance_ptr,
+                               DenoisingTask *task);
+  bool denoising_get_feature(int mean_offset,
+                             int variance_offset,
+                             device_ptr mean_ptr,
+                             device_ptr variance_ptr,
+                             float scale,
+                             DenoisingTask *task);
+  bool denoising_write_feature(int to_offset,
+                               device_ptr from_ptr,
+                               device_ptr buffer_ptr,
+                               DenoisingTask *task);
+  bool denoising_detect_outliers(device_ptr image_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr depth_ptr,
+                                 device_ptr output_ptr,
+                                 DenoisingTask *task);
+
+  device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int size);
+  void mem_free_sub_ptr(device_ptr ptr);
+
+  class ArgumentWrapper {
+   public:
+    ArgumentWrapper() : size(0), pointer(NULL)
+    {
+    }
+
+    ArgumentWrapper(device_memory &argument)
+        : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
+    {
+    }
+
+    template<typename T>
+    ArgumentWrapper(device_vector<T> &argument)
+        : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
+    {
+    }
+
+    template<typename T>
+    ArgumentWrapper(device_only_memory<T> &argument)
+        : size(sizeof(void *)), pointer((void *)(&argument.device_pointer))
+    {
+    }
+    template<typename T> ArgumentWrapper(T &argument) : size(sizeof(argument)), pointer(&argument)
+    {
+    }
+
+    ArgumentWrapper(int argument) : size(sizeof(int)), int_value(argument), pointer(&int_value)
+    {
+    }
+
+    ArgumentWrapper(float argument)
+        : size(sizeof(float)), float_value(argument), pointer(&float_value)
+    {
+    }
+
+    size_t size;
+    int int_value;
+    float float_value;
+    void *pointer;
+  };
+
+  /* TODO(sergey): In the future we can use variadic templates, once
+   * C++0x is allowed. Should allow to clean this up a bit.
+   */
+  int kernel_set_args(cl_kernel kernel,
+                      int start_argument_index,
+                      const ArgumentWrapper &arg1 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg2 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg3 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg4 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg5 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg6 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg7 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg8 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg9 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg10 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg11 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg12 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg13 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg14 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg15 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg16 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg17 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg18 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg19 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg20 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg21 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg22 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg23 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg24 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg25 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg26 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg27 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg28 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg29 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg30 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg31 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg32 = ArgumentWrapper(),
+                      const ArgumentWrapper &arg33 = ArgumentWrapper());
+
+  void release_kernel_safe(cl_kernel kernel);
+  void release_mem_object_safe(cl_mem mem);
+  void release_program_safe(cl_program program);
+
+  /* ** Those guys are for workign around some compiler-specific bugs ** */
+
+  cl_program load_cached_kernel(ustring key, thread_scoped_lock &cache_locker);
+
+  void store_cached_kernel(cl_program program, ustring key, thread_scoped_lock &cache_locker);
+
+ private:
+  MemoryManager memory_manager;
+  friend class MemoryManager;
+
+  static_assert_align(TextureInfo, 16);
+  device_vector<TextureInfo> texture_info;
+
+  typedef map<string, device_memory *> TexturesMap;
+  TexturesMap textures;
+
+  bool textures_need_update;
+
+ protected:
+  void flush_texture_buffers();
+
+  friend class OpenCLSplitKernel;
+  friend class OpenCLSplitKernelFunction;
+};
+
+Device *opencl_create_split_device(DeviceInfo &info,
+                                   Stats &stats,
+                                   Profiler &profiler,
+                                   bool background);
+
+CCL_NAMESPACE_END
+
+#endif
diff -Naur a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
--- a/intern/cycles/device/opencl/opencl_split.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/device/opencl/opencl_split.cpp	2020-01-10 20:42:43.460923388 +0300
@@ -56,7 +56,11 @@
     "enqueue_inactive "
     "next_iteration_setup "
     "indirect_subsurface "
-    "buffer_update";
+    "buffer_update "
+    "adaptive_stopping "
+    "adaptive_filter_x "
+    "adaptive_filter_y "
+    "adaptive_adjust_samples";

 const string OpenCLDevice::get_opencl_program_name(const string &kernel_name)
 {
@@ -283,6 +287,10 @@
     ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(next_iteration_setup);
     ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(indirect_subsurface);
     ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(buffer_update);
+    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_stopping);
+    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_x);
+    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_filter_y);
+    ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(adaptive_adjust_samples);
     programs.push_back(&program_split);

 #  undef ADD_SPLIT_KERNEL_PROGRAM
diff -Naur a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
--- a/intern/cycles/kernel/CMakeLists.txt	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/CMakeLists.txt	2020-01-10 20:42:43.460923388 +0300
@@ -36,6 +36,10 @@
 )

 set(SRC_OPENCL_KERNELS
+  kernels/opencl/kernel_adaptive_stopping.cl
+  kernels/opencl/kernel_adaptive_filter_x.cl
+  kernels/opencl/kernel_adaptive_filter_y.cl
+  kernels/opencl/kernel_adaptive_adjust_samples.cl
   kernels/opencl/kernel_bake.cl
   kernels/opencl/kernel_base.cl
   kernels/opencl/kernel_displace.cl
@@ -94,6 +98,7 @@

 set(SRC_HEADERS
   kernel_accumulate.h
+  kernel_adaptive_sampling.h
   kernel_bake.h
   kernel_camera.h
   kernel_color.h
@@ -323,6 +328,10 @@
 )

 set(SRC_SPLIT_HEADERS
+  split/kernel_adaptive_adjust_samples.h
+  split/kernel_adaptive_filter_x.h
+  split/kernel_adaptive_filter_y.h
+  split/kernel_adaptive_stopping.h
   split/kernel_branched.h
   split/kernel_buffer_update.h
   split/kernel_data_init.h
diff -Naur a/intern/cycles/kernel/CMakeLists.txt.orig b/intern/cycles/kernel/CMakeLists.txt.orig
--- a/intern/cycles/kernel/CMakeLists.txt.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/CMakeLists.txt.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,662 @@
+remove_extra_strict_flags()
+
+set(INC
+  ..
+)
+
+set(INC_SYS
+
+)
+
+set(SRC_CPU_KERNELS
+  kernels/cpu/kernel.cpp
+  kernels/cpu/kernel_sse2.cpp
+  kernels/cpu/kernel_sse3.cpp
+  kernels/cpu/kernel_sse41.cpp
+  kernels/cpu/kernel_avx.cpp
+  kernels/cpu/kernel_avx2.cpp
+  kernels/cpu/kernel_split.cpp
+  kernels/cpu/kernel_split_sse2.cpp
+  kernels/cpu/kernel_split_sse3.cpp
+  kernels/cpu/kernel_split_sse41.cpp
+  kernels/cpu/kernel_split_avx.cpp
+  kernels/cpu/kernel_split_avx2.cpp
+  kernels/cpu/filter.cpp
+  kernels/cpu/filter_sse2.cpp
+  kernels/cpu/filter_sse3.cpp
+  kernels/cpu/filter_sse41.cpp
+  kernels/cpu/filter_avx.cpp
+  kernels/cpu/filter_avx2.cpp
+)
+
+set(SRC_CUDA_KERNELS
+  kernels/cuda/kernel.cu
+  kernels/cuda/kernel_split.cu
+  kernels/cuda/filter.cu
+)
+
+set(SRC_OPENCL_KERNELS
+  kernels/opencl/kernel_bake.cl
+  kernels/opencl/kernel_base.cl
+  kernels/opencl/kernel_displace.cl
+  kernels/opencl/kernel_background.cl
+  kernels/opencl/kernel_state_buffer_size.cl
+  kernels/opencl/kernel_split_bundle.cl
+  kernels/opencl/kernel_data_init.cl
+  kernels/opencl/kernel_path_init.cl
+  kernels/opencl/kernel_queue_enqueue.cl
+  kernels/opencl/kernel_scene_intersect.cl
+  kernels/opencl/kernel_lamp_emission.cl
+  kernels/opencl/kernel_do_volume.cl
+  kernels/opencl/kernel_indirect_background.cl
+  kernels/opencl/kernel_shader_setup.cl
+  kernels/opencl/kernel_shader_sort.cl
+  kernels/opencl/kernel_shader_eval.cl
+  kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+  kernels/opencl/kernel_subsurface_scatter.cl
+  kernels/opencl/kernel_direct_lighting.cl
+  kernels/opencl/kernel_shadow_blocked_ao.cl
+  kernels/opencl/kernel_shadow_blocked_dl.cl
+  kernels/opencl/kernel_enqueue_inactive.cl
+  kernels/opencl/kernel_next_iteration_setup.cl
+  kernels/opencl/kernel_indirect_subsurface.cl
+  kernels/opencl/kernel_buffer_update.cl
+  kernels/opencl/filter.cl
+)
+
+set(SRC_OPTIX_KERNELS
+  kernels/optix/kernel_optix.cu
+)
+
+set(SRC_BVH_HEADERS
+  bvh/bvh.h
+  bvh/bvh_nodes.h
+  bvh/bvh_shadow_all.h
+  bvh/bvh_local.h
+  bvh/bvh_traversal.h
+  bvh/bvh_types.h
+  bvh/bvh_volume.h
+  bvh/bvh_volume_all.h
+  bvh/qbvh_nodes.h
+  bvh/qbvh_shadow_all.h
+  bvh/qbvh_local.h
+  bvh/qbvh_traversal.h
+  bvh/qbvh_volume.h
+  bvh/qbvh_volume_all.h
+  bvh/obvh_nodes.h
+  bvh/obvh_shadow_all.h
+  bvh/obvh_local.h
+  bvh/obvh_traversal.h
+  bvh/obvh_volume.h
+  bvh/obvh_volume_all.h
+  bvh/bvh_embree.h
+)
+
+set(SRC_HEADERS
+  kernel_accumulate.h
+  kernel_bake.h
+  kernel_camera.h
+  kernel_color.h
+  kernel_compat_cpu.h
+  kernel_compat_cuda.h
+  kernel_compat_optix.h
+  kernel_compat_opencl.h
+  kernel_differential.h
+  kernel_emission.h
+  kernel_film.h
+  kernel_globals.h
+  kernel_id_passes.h
+  kernel_jitter.h
+  kernel_light.h
+  kernel_math.h
+  kernel_montecarlo.h
+  kernel_passes.h
+  kernel_path.h
+  kernel_path_branched.h
+  kernel_path_common.h
+  kernel_path_state.h
+  kernel_path_surface.h
+  kernel_path_subsurface.h
+  kernel_path_volume.h
+  kernel_profiling.h
+  kernel_projection.h
+  kernel_queues.h
+  kernel_random.h
+  kernel_shader.h
+  kernel_shadow.h
+  kernel_subsurface.h
+  kernel_textures.h
+  kernel_types.h
+  kernel_volume.h
+  kernel_work_stealing.h
+  kernel_write_passes.h
+)
+
+set(SRC_KERNELS_CPU_HEADERS
+  kernel.h
+  kernels/cpu/kernel_cpu.h
+  kernels/cpu/kernel_cpu_impl.h
+  kernels/cpu/kernel_cpu_image.h
+  kernels/cpu/filter_cpu.h
+  kernels/cpu/filter_cpu_impl.h
+)
+
+set(SRC_KERNELS_CUDA_HEADERS
+  kernels/cuda/kernel_config.h
+  kernels/cuda/kernel_cuda_image.h
+)
+
+set(SRC_KERNELS_OPTIX_HEADERS
+)
+
+set(SRC_KERNELS_OPENCL_HEADERS
+  kernels/opencl/kernel_split_function.h
+  kernels/opencl/kernel_opencl_image.h
+)
+
+set(SRC_CLOSURE_HEADERS
+  closure/alloc.h
+  closure/bsdf.h
+  closure/bsdf_ashikhmin_velvet.h
+  closure/bsdf_diffuse.h
+  closure/bsdf_diffuse_ramp.h
+  closure/bsdf_microfacet.h
+  closure/bsdf_microfacet_multi.h
+  closure/bsdf_microfacet_multi_impl.h
+  closure/bsdf_oren_nayar.h
+  closure/bsdf_phong_ramp.h
+  closure/bsdf_reflection.h
+  closure/bsdf_refraction.h
+  closure/bsdf_toon.h
+  closure/bsdf_transparent.h
+  closure/bsdf_util.h
+  closure/bsdf_ashikhmin_shirley.h
+  closure/bsdf_hair.h
+  closure/bssrdf.h
+  closure/emissive.h
+  closure/volume.h
+  closure/bsdf_principled_diffuse.h
+  closure/bsdf_principled_sheen.h
+  closure/bsdf_hair_principled.h
+)
+
+set(SRC_SVM_HEADERS
+  svm/svm.h
+  svm/svm_ao.h
+  svm/svm_aov.h
+  svm/svm_attribute.h
+  svm/svm_bevel.h
+  svm/svm_blackbody.h
+  svm/svm_bump.h
+  svm/svm_camera.h
+  svm/svm_clamp.h
+  svm/svm_closure.h
+  svm/svm_convert.h
+  svm/svm_checker.h
+  svm/svm_color_util.h
+  svm/svm_brick.h
+  svm/svm_displace.h
+  svm/svm_fresnel.h
+  svm/svm_wireframe.h
+  svm/svm_wavelength.h
+  svm/svm_gamma.h
+  svm/svm_brightness.h
+  svm/svm_geometry.h
+  svm/svm_gradient.h
+  svm/svm_hsv.h
+  svm/svm_ies.h
+  svm/svm_image.h
+  svm/svm_invert.h
+  svm/svm_light_path.h
+  svm/svm_magic.h
+  svm/svm_map_range.h
+  svm/svm_mapping.h
+  svm/svm_mapping_util.h
+  svm/svm_math.h
+  svm/svm_math_util.h
+  svm/svm_mix.h
+  svm/svm_musgrave.h
+  svm/svm_noise.h
+  svm/svm_noisetex.h
+  svm/svm_normal.h
+  svm/svm_ramp.h
+  svm/svm_ramp_util.h
+  svm/svm_sepcomb_hsv.h
+  svm/svm_sepcomb_vector.h
+  svm/svm_sky.h
+  svm/svm_tex_coord.h
+  svm/svm_fractal_noise.h
+  svm/svm_types.h
+  svm/svm_value.h
+  svm/svm_vector_transform.h
+  svm/svm_voronoi.h
+  svm/svm_voxel.h
+  svm/svm_wave.h
+  svm/svm_white_noise.h
+  svm/svm_vertex_color.h
+)
+
+set(SRC_GEOM_HEADERS
+  geom/geom.h
+  geom/geom_attribute.h
+  geom/geom_curve.h
+  geom/geom_curve_intersect.h
+  geom/geom_motion_curve.h
+  geom/geom_motion_triangle.h
+  geom/geom_motion_triangle_intersect.h
+  geom/geom_motion_triangle_shader.h
+  geom/geom_object.h
+  geom/geom_patch.h
+  geom/geom_primitive.h
+  geom/geom_subd_triangle.h
+  geom/geom_triangle.h
+  geom/geom_triangle_intersect.h
+  geom/geom_volume.h
+)
+
+set(SRC_FILTER_HEADERS
+  filter/filter.h
+  filter/filter_defines.h
+  filter/filter_features.h
+  filter/filter_features_sse.h
+  filter/filter_kernel.h
+  filter/filter_nlm_cpu.h
+  filter/filter_nlm_gpu.h
+  filter/filter_prefilter.h
+  filter/filter_reconstruction.h
+  filter/filter_transform.h
+  filter/filter_transform_gpu.h
+  filter/filter_transform_sse.h
+)
+
+set(SRC_UTIL_HEADERS
+  ../util/util_atomic.h
+  ../util/util_color.h
+  ../util/util_defines.h
+  ../util/util_half.h
+  ../util/util_hash.h
+  ../util/util_math.h
+  ../util/util_math_fast.h
+  ../util/util_math_intersect.h
+  ../util/util_math_float2.h
+  ../util/util_math_float3.h
+  ../util/util_math_float4.h
+  ../util/util_math_int2.h
+  ../util/util_math_int3.h
+  ../util/util_math_int4.h
+  ../util/util_math_matrix.h
+  ../util/util_projection.h
+  ../util/util_rect.h
+  ../util/util_static_assert.h
+  ../util/util_transform.h
+  ../util/util_texture.h
+  ../util/util_types.h
+  ../util/util_types_float2.h
+  ../util/util_types_float2_impl.h
+  ../util/util_types_float3.h
+  ../util/util_types_float3_impl.h
+  ../util/util_types_float4.h
+  ../util/util_types_float4_impl.h
+  ../util/util_types_float8.h
+  ../util/util_types_float8_impl.h
+  ../util/util_types_int2.h
+  ../util/util_types_int2_impl.h
+  ../util/util_types_int3.h
+  ../util/util_types_int3_impl.h
+  ../util/util_types_int4.h
+  ../util/util_types_int4_impl.h
+  ../util/util_types_uchar2.h
+  ../util/util_types_uchar2_impl.h
+  ../util/util_types_uchar3.h
+  ../util/util_types_uchar3_impl.h
+  ../util/util_types_uchar4.h
+  ../util/util_types_uchar4_impl.h
+  ../util/util_types_uint2.h
+  ../util/util_types_uint2_impl.h
+  ../util/util_types_uint3.h
+  ../util/util_types_uint3_impl.h
+  ../util/util_types_uint4.h
+  ../util/util_types_uint4_impl.h
+  ../util/util_types_ushort4.h
+  ../util/util_types_vector3.h
+  ../util/util_types_vector3_impl.h
+)
+
+set(SRC_SPLIT_HEADERS
+  split/kernel_branched.h
+  split/kernel_buffer_update.h
+  split/kernel_data_init.h
+  split/kernel_direct_lighting.h
+  split/kernel_do_volume.h
+  split/kernel_enqueue_inactive.h
+  split/kernel_holdout_emission_blurring_pathtermination_ao.h
+  split/kernel_indirect_background.h
+  split/kernel_indirect_subsurface.h
+  split/kernel_lamp_emission.h
+  split/kernel_next_iteration_setup.h
+  split/kernel_path_init.h
+  split/kernel_queue_enqueue.h
+  split/kernel_scene_intersect.h
+  split/kernel_shader_setup.h
+  split/kernel_shader_sort.h
+  split/kernel_shader_eval.h
+  split/kernel_shadow_blocked_ao.h
+  split/kernel_shadow_blocked_dl.h
+  split/kernel_split_common.h
+  split/kernel_split_data.h
+  split/kernel_split_data_types.h
+  split/kernel_subsurface_scatter.h
+)
+
+set(LIB
+
+)
+
+# CUDA module
+
+if(WITH_CYCLES_CUDA_BINARIES)
+  # 64 bit only
+  set(CUDA_BITS 64)
+
+  # CUDA version
+  execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
+  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
+  string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
+  set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
+
+  # warn for other versions
+  if(CUDA_VERSION MATCHES "101")
+  else()
+    message(WARNING
+      "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
+      "build may succeed but only CUDA 10.1 is officially supported")
+  endif()
+
+  # build for each arch
+  set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu
+    ${SRC_HEADERS}
+    ${SRC_KERNELS_CUDA_HEADERS}
+    ${SRC_BVH_HEADERS}
+    ${SRC_SVM_HEADERS}
+    ${SRC_GEOM_HEADERS}
+    ${SRC_CLOSURE_HEADERS}
+    ${SRC_UTIL_HEADERS}
+  )
+  set(cuda_filter_sources kernels/cuda/filter.cu
+    ${SRC_HEADERS}
+    ${SRC_KERNELS_CUDA_HEADERS}
+    ${SRC_FILTER_HEADERS}
+    ${SRC_UTIL_HEADERS}
+  )
+  set(cuda_cubins)
+
+  macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
+    if(${arch} MATCHES "compute_.*")
+      set(format "ptx")
+    else()
+      set(format "cubin")
+    endif()
+    set(cuda_file ${name}_${arch}.${format})
+
+    set(kernel_sources ${sources})
+    if(NOT ${prev_arch} STREQUAL "none")
+      if(${prev_arch} MATCHES "compute_.*")
+        set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
+      else()
+        set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
+      endif()
+    endif()
+
+    set(cuda_kernel_src "/kernels/cuda/${name}.cu")
+
+    set(cuda_flags
+      -D CCL_NAMESPACE_BEGIN=
+      -D CCL_NAMESPACE_END=
+      -D NVCC
+      -m ${CUDA_BITS}
+      -I ${CMAKE_CURRENT_SOURCE_DIR}/..
+      -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
+      --use_fast_math
+      -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file})
+
+    if(${experimental})
+      set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
+      set(name ${name}_experimental)
+    endif()
+
+    if(WITH_CYCLES_DEBUG)
+      set(cuda_flags ${cuda_flags} -D __KERNEL_DEBUG__)
+    endif()
+
+    if(WITH_CYCLES_CUBIN_COMPILER)
+      string(SUBSTRING ${arch} 3 -1 CUDA_ARCH)
+
+      # Needed to find libnvrtc-builtins.so. Can't do it from inside
+      # cycles_cubin_cc since the env variable is read before main()
+      if(APPLE)
+        set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+          -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib")
+      elseif(UNIX)
+        set(CUBIN_CC_ENV ${CMAKE_COMMAND}
+          -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+      endif()
+
+      add_custom_command(
+        OUTPUT ${cuda_cubin}
+        COMMAND ${CUBIN_CC_ENV}
+            "$<TARGET_FILE:cycles_cubin_cc>"
+            -target ${CUDA_ARCH}
+            -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+            ${cuda_flags}
+            -v
+            -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
+        DEPENDS ${kernel_sources} cycles_cubin_cc)
+      set(cuda_file ${cuda_cubin})
+    else()
+      add_custom_command(
+        OUTPUT ${cuda_file}
+        COMMAND ${CUDA_NVCC_EXECUTABLE}
+            -arch=${arch}
+            ${CUDA_NVCC_FLAGS}
+            --${format}
+            ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
+            --ptxas-options="-v"
+            ${cuda_flags}
+        DEPENDS ${kernel_sources})
+    endif()
+    delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
+    list(APPEND cuda_cubins ${cuda_file})
+
+    unset(cuda_debug_flags)
+  endmacro()
+
+  set(prev_arch "none")
+  foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
+    if(${arch} MATCHES "sm_2.")
+      message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
+    elseif(${arch} MATCHES "sm_7." AND ${CUDA_VERSION} LESS 100)
+      message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
+    else()
+      # Compile regular kernel
+      CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} filter "" "${cuda_filter_sources}" FALSE)
+      CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
+
+      if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES)
+        # Compile split kernel
+        CYCLES_CUDA_KERNEL_ADD(${arch} ${prev_arch} kernel_split "-D __SPLIT__" "${cuda_sources}" FALSE)
+      endif()
+
+      if(WITH_CYCLES_CUDA_BUILD_SERIAL)
+        set(prev_arch ${arch})
+      endif()
+    endif()
+  endforeach()
+
+  add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
+  cycles_set_solution_folder(cycles_kernel_cuda)
+endif()
+
+# OptiX PTX modules
+
+if(WITH_CYCLES_DEVICE_OPTIX)
+  foreach(input ${SRC_OPTIX_KERNELS})
+    get_filename_component(input_we ${input} NAME_WE)
+
+    set(output "${CMAKE_CURRENT_BINARY_DIR}/${input_we}.ptx")
+    set(cuda_flags
+      -I "${OPTIX_INCLUDE_DIR}"
+      -I "${CMAKE_CURRENT_SOURCE_DIR}/.."
+      -I "${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda"
+      -arch=sm_30
+      --use_fast_math
+      -o ${output})
+
+    if(WITH_CYCLES_DEBUG)
+      set(cuda_flags ${cuda_flags}
+        -D __KERNEL_DEBUG__)
+    endif()
+
+    add_custom_command(
+      OUTPUT
+        ${output}
+      DEPENDS
+        ${input}
+        ${SRC_HEADERS}
+        ${SRC_KERNELS_CUDA_HEADERS}
+        ${SRC_KERNELS_OPTIX_HEADERS}
+        ${SRC_BVH_HEADERS}
+        ${SRC_SVM_HEADERS}
+        ${SRC_GEOM_HEADERS}
+        ${SRC_CLOSURE_HEADERS}
+        ${SRC_UTIL_HEADERS}
+      COMMAND
+        ${CUDA_NVCC_EXECUTABLE} --ptx ${cuda_flags} ${input}
+      WORKING_DIRECTORY
+        "${CMAKE_CURRENT_SOURCE_DIR}")
+
+    list(APPEND optix_ptx ${output})
+
+    delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)
+  endforeach()
+
+  add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
+  cycles_set_solution_folder(cycles_kernel_optix)
+endif()
+
+# OSL module
+
+if(WITH_CYCLES_OSL)
+  list(APPEND LIB
+    cycles_kernel_osl
+  )
+  add_subdirectory(osl)
+  add_subdirectory(shaders)
+endif()
+
+# CPU module
+
+include_directories(${INC})
+include_directories(SYSTEM ${INC_SYS})
+
+if(WITH_COMPILER_ASAN)
+  if(CMAKE_COMPILER_IS_GNUCC AND (NOT WITH_CYCLES_KERNEL_ASAN))
+    # GCC hangs compiling the big kernel files with asan and release, so disable by default.
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=all")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr")
+  elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
+    # With OSL, Cycles disables rtti in some modules, wich then breaks at linking
+    # when trying to use vptr sanitizer (included into 'undefined' general option).
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=vptr")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr")
+  endif()
+endif()
+
+set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
+set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
+set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
+
+if(CXX_HAS_SSE)
+  set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+endif()
+
+if(CXX_HAS_AVX)
+  set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+endif()
+
+if(CXX_HAS_AVX2)
+  set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+  set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+endif()
+
+cycles_add_library(cycles_kernel "${LIB}"
+  ${SRC_CPU_KERNELS}
+  ${SRC_CUDA_KERNELS}
+  ${SRC_OPTIX_KERNELS}
+  ${SRC_OPENCL_KERNELS}
+  ${SRC_HEADERS}
+  ${SRC_KERNELS_CPU_HEADERS}
+  ${SRC_KERNELS_CUDA_HEADERS}
+  ${SRC_KERNELS_OPTIX_HEADERS}
+  ${SRC_KERNELS_OPENCL_HEADERS}
+  ${SRC_BVH_HEADERS}
+  ${SRC_CLOSURE_HEADERS}
+  ${SRC_FILTER_HEADERS}
+  ${SRC_SVM_HEADERS}
+  ${SRC_GEOM_HEADERS}
+  ${SRC_SPLIT_HEADERS}
+)
+
+source_group("bvh" FILES ${SRC_BVH_HEADERS})
+source_group("closure" FILES ${SRC_CLOSURE_HEADERS})
+source_group("filter" FILES ${SRC_FILTER_HEADERS})
+source_group("geom" FILES ${SRC_GEOM_HEADERS})
+source_group("kernel" FILES ${SRC_HEADERS})
+source_group("kernel\\split" FILES ${SRC_SPLIT_HEADERS})
+source_group("kernels\\cpu" FILES ${SRC_CPU_KERNELS} ${SRC_KERNELS_CPU_HEADERS})
+source_group("kernels\\cuda" FILES ${SRC_CUDA_KERNELS} ${SRC_KERNELS_CUDA_HEADERS})
+source_group("kernels\\opencl" FILES ${SRC_OPENCL_KERNELS} ${SRC_KERNELS_OPENCL_HEADERS})
+source_group("kernels\\optix" FILES ${SRC_OPTIX_KERNELS} ${SRC_KERNELS_OPTIX_HEADERS})
+source_group("svm" FILES ${SRC_SVM_HEADERS})
+
+if(WITH_CYCLES_CUDA)
+  add_dependencies(cycles_kernel cycles_kernel_cuda)
+endif()
+if(WITH_CYCLES_DEVICE_OPTIX)
+  add_dependencies(cycles_kernel cycles_kernel_optix)
+endif()
+
+# OpenCL kernel
+
+# set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
+# add_custom_command(
+#    OUTPUT ${KERNEL_PREPROCESSED}
+#    COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
+#    DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
+# add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
+# delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
+
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPENCL_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CUDA_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPTIX_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/optix)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPENCL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/optix)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_FILTER_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/filter)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/util)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/split)
diff -Naur a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h	2020-01-10 20:42:43.464256721 +0300
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KERNEL_ADAPTIVE_SAMPLING_H__
+#define __KERNEL_ADAPTIVE_SAMPLING_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
+
+ccl_device void kernel_do_adaptive_stopping(KernelGlobals *kg,
+                                            ccl_global float *buffer,
+                                            int sample)
+{
+  /* TODO Stefan: Is this better in linear, sRGB or something else? */
+  float4 I = *((ccl_global float4 *)buffer);
+  float4 A = *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+  /* The per pixel error as seen in section 2.1 of
+   * "A hierarchical automatic stopping condition for Monte Carlo global illumination"
+   * A small epsilon is added to the divisor to prevent division by zero. */
+  float error = (fabsf(I.x - A.x) + fabsf(I.y - A.y) + fabsf(I.z - A.z)) /
+                (sample * 0.0001f + sqrtf(I.x + I.y + I.z));
+  if (error < kernel_data.integrator.adaptive_threshold * (float)sample) {
+    /* Set the fourth component to non-zero value to indicate that this pixel has converged. */
+    buffer[kernel_data.film.pass_adaptive_aux_buffer + 3] += 1.0f;
+  }
+}
+
+/* Adjust the values of an adaptively sampled pixel. */
+
+ccl_device void kernel_adaptive_post_adjust(KernelGlobals *kg,
+                                            ccl_global float *buffer,
+                                            float sample_multiplier)
+{
+  *(ccl_global float4 *)(buffer) *= sample_multiplier;
+
+  /* Scale the aux pass too, this is necessary for progressive rendering to work properly. */
+  kernel_assert(kernel_data.film.pass_adaptive_aux_buffer);
+  *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer) *= sample_multiplier;
+
+#ifdef __PASSES__
+  int flag = kernel_data.film.pass_flag;
+
+  if (flag & PASSMASK(SHADOW))
+    *(ccl_global float3 *)(buffer + kernel_data.film.pass_shadow) *= sample_multiplier;
+
+  if (flag & PASSMASK(MIST))
+    *(ccl_global float *)(buffer + kernel_data.film.pass_mist) *= sample_multiplier;
+
+  if (flag & PASSMASK(NORMAL))
+    *(ccl_global float3 *)(buffer + kernel_data.film.pass_normal) *= sample_multiplier;
+
+  if (flag & PASSMASK(UV))
+    *(ccl_global float3 *)(buffer + kernel_data.film.pass_uv) *= sample_multiplier;
+
+  if (flag & PASSMASK(MOTION)) {
+    *(ccl_global float4 *)(buffer + kernel_data.film.pass_motion) *= sample_multiplier;
+    *(ccl_global float *)(buffer + kernel_data.film.pass_motion_weight) *= sample_multiplier;
+  }
+
+  if (kernel_data.film.use_light_pass) {
+    int light_flag = kernel_data.film.light_pass_flag;
+
+    if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_indirect) *= sample_multiplier;
+    if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_indirect) *= sample_multiplier;
+    if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+      *(ccl_global float3 *)(buffer +
+                             kernel_data.film.pass_transmission_indirect) *= sample_multiplier;
+    if (light_flag & PASSMASK(SUBSURFACE_INDIRECT))
+      *(ccl_global float3 *)(buffer +
+                             kernel_data.film.pass_subsurface_indirect) *= sample_multiplier;
+    if (light_flag & PASSMASK(VOLUME_INDIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_indirect) *= sample_multiplier;
+    if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_direct) *= sample_multiplier;
+    if (light_flag & PASSMASK(GLOSSY_DIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_direct) *= sample_multiplier;
+    if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+      *(ccl_global float3 *)(buffer +
+                             kernel_data.film.pass_transmission_direct) *= sample_multiplier;
+    if (light_flag & PASSMASK(SUBSURFACE_DIRECT))
+      *(ccl_global float3 *)(buffer +
+                             kernel_data.film.pass_subsurface_direct) *= sample_multiplier;
+    if (light_flag & PASSMASK(VOLUME_DIRECT))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_volume_direct) *= sample_multiplier;
+
+    if (light_flag & PASSMASK(EMISSION))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_emission) *= sample_multiplier;
+    if (light_flag & PASSMASK(BACKGROUND))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_background) *= sample_multiplier;
+    if (light_flag & PASSMASK(AO))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_ao) *= sample_multiplier;
+
+    if (light_flag & PASSMASK(DIFFUSE_COLOR))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_diffuse_color) *= sample_multiplier;
+    if (light_flag & PASSMASK(GLOSSY_COLOR))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_glossy_color) *= sample_multiplier;
+    if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+      *(ccl_global float3 *)(buffer +
+                             kernel_data.film.pass_transmission_color) *= sample_multiplier;
+    if (light_flag & PASSMASK(SUBSURFACE_COLOR))
+      *(ccl_global float3 *)(buffer + kernel_data.film.pass_subsurface_color) *= sample_multiplier;
+  }
+#endif
+
+#ifdef __DENOISING_FEATURES__
+
+#  define scale_float3_variance(buffer, offset, scale) \
+    *(buffer + offset) *= scale; \
+    *(buffer + offset + 1) *= scale; \
+    *(buffer + offset + 2) *= scale; \
+    *(buffer + offset + 3) *= scale * scale; \
+    *(buffer + offset + 4) *= scale * scale; \
+    *(buffer + offset + 5) *= scale * scale;
+
+#  define scale_shadow_variance(buffer, offset, scale) \
+    *(buffer + offset) *= scale; \
+    *(buffer + offset + 1) *= scale; \
+    *(buffer + offset + 2) *= scale * scale;
+
+  if (kernel_data.film.pass_denoising_data) {
+    scale_shadow_variance(
+        buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_A, sample_multiplier);
+    scale_shadow_variance(
+        buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_SHADOW_B, sample_multiplier);
+    if (kernel_data.film.pass_denoising_clean) {
+      scale_float3_variance(
+          buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+      *(buffer + kernel_data.film.pass_denoising_clean) *= sample_multiplier;
+      *(buffer + kernel_data.film.pass_denoising_clean + 1) *= sample_multiplier;
+      *(buffer + kernel_data.film.pass_denoising_clean + 2) *= sample_multiplier;
+    }
+    else {
+      scale_float3_variance(
+          buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, sample_multiplier);
+    }
+    scale_float3_variance(
+        buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_NORMAL, sample_multiplier);
+    scale_float3_variance(
+        buffer, kernel_data.film.pass_denoising_data + DENOISING_PASS_ALBEDO, sample_multiplier);
+    *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH) *= sample_multiplier;
+    *(buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH +
+      1) *= sample_multiplier * sample_multiplier;
+  }
+#endif /* __DENOISING_FEATURES__ */
+
+  if (kernel_data.film.cryptomatte_passes) {
+    int num_slots = 0;
+    num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) ? 1 : 0;
+    num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) ? 1 : 0;
+    num_slots += (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) ? 1 : 0;
+    num_slots = num_slots * 2 * kernel_data.film.cryptomatte_depth;
+    ccl_global float2 *id_buffer = (ccl_global float2 *)(buffer +
+                                                         kernel_data.film.pass_cryptomatte);
+    for (int slot = 0; slot < num_slots; slot++) {
+      id_buffer[slot].y *= sample_multiplier;
+    }
+  }
+}
+
+/* This is a simple box filter in two passes.
+ * When a pixel demands more adaptive samples, let its neighboring pixels draw more samples too. */
+
+ccl_device bool kernel_do_adaptive_filter_x(KernelGlobals *kg, int y, ccl_global WorkTile *tile)
+{
+  bool any = false;
+  bool prev = false;
+  for (int x = tile->x; x < tile->x + tile->w; ++x) {
+    int index = tile->offset + x + y * tile->stride;
+    ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+    ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                   kernel_data.film.pass_adaptive_aux_buffer);
+    if (aux->w == 0.0f) {
+      any = true;
+      if (x > tile->x && !prev) {
+        index = index - 1;
+        buffer = tile->buffer + index * kernel_data.film.pass_stride;
+        aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+        aux->w = 0.0f;
+      }
+      prev = true;
+    }
+    else {
+      if (prev) {
+        aux->w = 0.0f;
+      }
+      prev = false;
+    }
+  }
+  return any;
+}
+
+ccl_device bool kernel_do_adaptive_filter_y(KernelGlobals *kg, int x, ccl_global WorkTile *tile)
+{
+  bool prev = false;
+  bool any = false;
+  for (int y = tile->y; y < tile->y + tile->h; ++y) {
+    int index = tile->offset + x + y * tile->stride;
+    ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+    ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                   kernel_data.film.pass_adaptive_aux_buffer);
+    if (aux->w == 0.0f) {
+      any = true;
+      if (y > tile->y && !prev) {
+        index = index - tile->stride;
+        buffer = tile->buffer + index * kernel_data.film.pass_stride;
+        aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+        aux->w = 0.0f;
+      }
+      prev = true;
+    }
+    else {
+      if (prev) {
+        aux->w = 0.0f;
+      }
+      prev = false;
+    }
+  }
+  return any;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __KERNEL_ADAPTIVE_SAMPLING_H__ */
diff -Naur a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
--- a/intern/cycles/kernel/kernel_passes.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_passes.h	2020-01-10 20:42:43.464256721 +0300
@@ -29,7 +29,9 @@
   if (kernel_data.film.pass_denoising_data == 0)
     return;

-  buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+  buffer += sample_is_even(kernel_data.integrator.sampling_pattern, sample) ?
+                DENOISING_PASS_SHADOW_B :
+                DENOISING_PASS_SHADOW_A;

   path_total = ensure_finite(path_total);
   path_total_shaded = ensure_finite(path_total_shaded);
@@ -383,6 +385,38 @@
 #ifdef __KERNEL_DEBUG__
   kernel_write_debug_passes(kg, buffer, L);
 #endif
+
+  /* Adaptive Sampling. Fill the additional buffer with the odd samples and calculate our stopping criteria.
+     This is the heuristic from "A hierarchical automatic stopping condition for Monte Carlo global illumination"
+     except that here it is applied per pixel and not in hierarchical tiles. */
+  if (kernel_data.film.pass_adaptive_aux_buffer &&
+      kernel_data.integrator.adaptive_threshold > 0.0f) {
+    if (sample_is_even(kernel_data.integrator.sampling_pattern, sample)) {
+      kernel_write_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer,
+                               make_float4(L_sum.x * 2.0f, L_sum.y * 2.0f, L_sum.z * 2.0f, 0.0f));
+    }
+#ifdef __KERNEL_CPU__
+    if (sample >= kernel_data.integrator.adaptive_min_samples - 1 && (sample & 0x3) == 3) {
+      kernel_do_adaptive_stopping(kg, buffer, sample);
+    }
+#endif
+  }
+
+  /* Write the sample count as negative numbers initially to mark the samples as in progress.
+   * Once the tile has finished rendering, the sign gets flipped and all the pixel values
+   * are scaled as if they were taken at a uniform sample count. */
+  if (kernel_data.film.pass_sample_count) {
+    /* Make sure it's a negative number. In progressive refine mode, this bit gets flipped between passes. */
+#ifdef __ATOMIC_PASS_WRITE__
+    atomic_fetch_and_or_uint32((ccl_global uint *)(buffer + kernel_data.film.pass_sample_count),
+                               0x80000000);
+#else
+    if (buffer[kernel_data.film.pass_sample_count] > 0) {
+      buffer[kernel_data.film.pass_sample_count] *= -1.0f;
+    }
+#endif
+    kernel_write_pass_float(buffer + kernel_data.film.pass_sample_count, -1.0f);
+  }
 }

 CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernel_passes.h.orig b/intern/cycles/kernel/kernel_passes.h.orig
--- a/intern/cycles/kernel/kernel_passes.h.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernel_passes.h.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,388 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_id_passes.h"
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __DENOISING_FEATURES__
+
+ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
+                                                     ccl_global float *buffer,
+                                                     int sample,
+                                                     float path_total,
+                                                     float path_total_shaded)
+{
+  if (kernel_data.film.pass_denoising_data == 0)
+    return;
+
+  buffer += (sample & 1) ? DENOISING_PASS_SHADOW_B : DENOISING_PASS_SHADOW_A;
+
+  path_total = ensure_finite(path_total);
+  path_total_shaded = ensure_finite(path_total_shaded);
+
+  kernel_write_pass_float(buffer, path_total);
+  kernel_write_pass_float(buffer + 1, path_total_shaded);
+
+  float value = path_total_shaded / max(path_total, 1e-7f);
+  kernel_write_pass_float(buffer + 2, value * value);
+}
+
+ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
+                                                        ShaderData *sd,
+                                                        ccl_addr_space PathState *state,
+                                                        PathRadiance *L)
+{
+  if (state->denoising_feature_weight == 0.0f) {
+    return;
+  }
+
+  L->denoising_depth += ensure_finite(state->denoising_feature_weight * sd->ray_length);
+
+  /* Skip implicitly transparent surfaces. */
+  if (sd->flag & SD_HAS_ONLY_VOLUME) {
+    return;
+  }
+
+  float3 normal = make_float3(0.0f, 0.0f, 0.0f);
+  float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
+  float sum_weight = 0.0f, sum_nonspecular_weight = 0.0f;
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSDF_OR_BSSRDF(sc->type))
+      continue;
+
+    /* All closures contribute to the normal feature, but only diffuse-like ones to the albedo. */
+    normal += sc->N * sc->sample_weight;
+    sum_weight += sc->sample_weight;
+    if (bsdf_get_specular_roughness_squared(sc) > sqr(0.075f)) {
+      float3 closure_albedo = sc->weight;
+      /* Closures that include a Fresnel term typically have weights close to 1 even though their
+       * actual contribution is significantly lower.
+       * To account for this, we scale their weight by the average fresnel factor (the same is also
+       * done for the sample weight in the BSDF setup, so we don't need to scale that here). */
+      if (CLOSURE_IS_BSDF_MICROFACET_FRESNEL(sc->type)) {
+        MicrofacetBsdf *bsdf = (MicrofacetBsdf *)sc;
+        closure_albedo *= bsdf->extra->fresnel_color;
+      }
+
+      albedo += closure_albedo;
+      sum_nonspecular_weight += sc->sample_weight;
+    }
+  }
+
+  /* Wait for next bounce if 75% or more sample weight belongs to specular-like closures. */
+  if ((sum_weight == 0.0f) || (sum_nonspecular_weight * 4.0f > sum_weight)) {
+    if (sum_weight != 0.0f) {
+      normal /= sum_weight;
+    }
+
+    /* Transform normal into camera space. */
+    const Transform worldtocamera = kernel_data.cam.worldtocamera;
+    normal = transform_direction(&worldtocamera, normal);
+
+    L->denoising_normal += ensure_finite3(state->denoising_feature_weight * normal);
+    L->denoising_albedo += ensure_finite3(state->denoising_feature_weight * albedo);
+
+    state->denoising_feature_weight = 0.0f;
+  }
+}
+#endif /* __DENOISING_FEATURES__ */
+
+#ifdef __KERNEL_DEBUG__
+ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
+                                                 ccl_global float *buffer,
+                                                 PathRadiance *L)
+{
+  int flag = kernel_data.film.pass_flag;
+  if (flag & PASSMASK(BVH_TRAVERSED_NODES)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_nodes,
+                            L->debug_data.num_bvh_traversed_nodes);
+  }
+  if (flag & PASSMASK(BVH_TRAVERSED_INSTANCES)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_traversed_instances,
+                            L->debug_data.num_bvh_traversed_instances);
+  }
+  if (flag & PASSMASK(BVH_INTERSECTIONS)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_bvh_intersections,
+                            L->debug_data.num_bvh_intersections);
+  }
+  if (flag & PASSMASK(RAY_BOUNCES)) {
+    kernel_write_pass_float(buffer + kernel_data.film.pass_ray_bounces,
+                            L->debug_data.num_ray_bounces);
+  }
+}
+#endif /* __KERNEL_DEBUG__ */
+
+#ifdef __KERNEL_CPU__
+#  define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \
+    kernel_write_id_pass_cpu(buffer, depth * 2, id, matte_weight, kg->coverage_##name)
+ccl_device_inline size_t kernel_write_id_pass_cpu(
+    float *buffer, size_t depth, float id, float matte_weight, CoverageMap *map)
+{
+  if (map) {
+    (*map)[id] += matte_weight;
+    return 0;
+  }
+#else /* __KERNEL_CPU__ */
+#  define WRITE_ID_SLOT(buffer, depth, id, matte_weight, name) \
+    kernel_write_id_slots_gpu(buffer, depth * 2, id, matte_weight)
+ccl_device_inline size_t kernel_write_id_slots_gpu(ccl_global float *buffer,
+                                                   size_t depth,
+                                                   float id,
+                                                   float matte_weight)
+{
+#endif /* __KERNEL_CPU__ */
+  kernel_write_id_slots(buffer, depth, id, matte_weight);
+  return depth * 2;
+}
+
+ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg,
+                                                ccl_global float *buffer,
+                                                PathRadiance *L,
+                                                ShaderData *sd,
+                                                ccl_addr_space PathState *state,
+                                                float3 throughput)
+{
+#ifdef __PASSES__
+  int path_flag = state->flag;
+
+  if (!(path_flag & PATH_RAY_CAMERA))
+    return;
+
+  int flag = kernel_data.film.pass_flag;
+  int light_flag = kernel_data.film.light_pass_flag;
+
+  if (!((flag | light_flag) & PASS_ANY))
+    return;
+
+  if (!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
+    if (!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f ||
+        average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
+      if (state->sample == 0) {
+        if (flag & PASSMASK(DEPTH)) {
+          float depth = camera_distance(kg, sd->P);
+          kernel_write_pass_float(buffer + kernel_data.film.pass_depth, depth);
+        }
+        if (flag & PASSMASK(OBJECT_ID)) {
+          float id = object_pass_id(kg, sd->object);
+          kernel_write_pass_float(buffer + kernel_data.film.pass_object_id, id);
+        }
+        if (flag & PASSMASK(MATERIAL_ID)) {
+          float id = shader_pass_id(kg, sd);
+          kernel_write_pass_float(buffer + kernel_data.film.pass_material_id, id);
+        }
+      }
+
+      if (flag & PASSMASK(NORMAL)) {
+        float3 normal = shader_bsdf_average_normal(kg, sd);
+        kernel_write_pass_float3(buffer + kernel_data.film.pass_normal, normal);
+      }
+      if (flag & PASSMASK(UV)) {
+        float3 uv = primitive_uv(kg, sd);
+        kernel_write_pass_float3(buffer + kernel_data.film.pass_uv, uv);
+      }
+      if (flag & PASSMASK(MOTION)) {
+        float4 speed = primitive_motion_vector(kg, sd);
+        kernel_write_pass_float4(buffer + kernel_data.film.pass_motion, speed);
+        kernel_write_pass_float(buffer + kernel_data.film.pass_motion_weight, 1.0f);
+      }
+
+      state->flag |= PATH_RAY_SINGLE_PASS_DONE;
+    }
+  }
+
+  if (kernel_data.film.cryptomatte_passes) {
+    const float matte_weight = average(throughput) *
+                               (1.0f - average(shader_bsdf_transparency(kg, sd)));
+    if (matte_weight > 0.0f) {
+      ccl_global float *cryptomatte_buffer = buffer + kernel_data.film.pass_cryptomatte;
+      if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) {
+        float id = object_cryptomatte_id(kg, sd->object);
+        cryptomatte_buffer += WRITE_ID_SLOT(
+            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, object);
+      }
+      if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) {
+        float id = shader_cryptomatte_id(kg, sd->shader);
+        cryptomatte_buffer += WRITE_ID_SLOT(
+            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, material);
+      }
+      if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) {
+        float id = object_cryptomatte_asset_id(kg, sd->object);
+        cryptomatte_buffer += WRITE_ID_SLOT(
+            cryptomatte_buffer, kernel_data.film.cryptomatte_depth, id, matte_weight, asset);
+      }
+    }
+  }
+
+  if (light_flag & PASSMASK_COMPONENT(DIFFUSE))
+    L->color_diffuse += shader_bsdf_diffuse(kg, sd) * throughput;
+  if (light_flag & PASSMASK_COMPONENT(GLOSSY))
+    L->color_glossy += shader_bsdf_glossy(kg, sd) * throughput;
+  if (light_flag & PASSMASK_COMPONENT(TRANSMISSION))
+    L->color_transmission += shader_bsdf_transmission(kg, sd) * throughput;
+  if (light_flag & PASSMASK_COMPONENT(SUBSURFACE))
+    L->color_subsurface += shader_bsdf_subsurface(kg, sd) * throughput;
+
+  if (light_flag & PASSMASK(MIST)) {
+    /* bring depth into 0..1 range */
+    float mist_start = kernel_data.film.mist_start;
+    float mist_inv_depth = kernel_data.film.mist_inv_depth;
+
+    float depth = camera_distance(kg, sd->P);
+    float mist = saturate((depth - mist_start) * mist_inv_depth);
+
+    /* falloff */
+    float mist_falloff = kernel_data.film.mist_falloff;
+
+    if (mist_falloff == 1.0f)
+      ;
+    else if (mist_falloff == 2.0f)
+      mist = mist * mist;
+    else if (mist_falloff == 0.5f)
+      mist = sqrtf(mist);
+    else
+      mist = powf(mist, mist_falloff);
+
+    /* modulate by transparency */
+    float3 alpha = shader_bsdf_alpha(kg, sd);
+    L->mist += (1.0f - mist) * average(throughput * alpha);
+  }
+#endif
+}
+
+ccl_device_inline void kernel_write_light_passes(KernelGlobals *kg,
+                                                 ccl_global float *buffer,
+                                                 PathRadiance *L)
+{
+#ifdef __PASSES__
+  int light_flag = kernel_data.film.light_pass_flag;
+
+  if (!kernel_data.film.use_light_pass)
+    return;
+
+  if (light_flag & PASSMASK(DIFFUSE_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_indirect, L->indirect_diffuse);
+  if (light_flag & PASSMASK(GLOSSY_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_indirect, L->indirect_glossy);
+  if (light_flag & PASSMASK(TRANSMISSION_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_indirect,
+                             L->indirect_transmission);
+  if (light_flag & PASSMASK(SUBSURFACE_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_indirect,
+                             L->indirect_subsurface);
+  if (light_flag & PASSMASK(VOLUME_INDIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_indirect, L->indirect_scatter);
+  if (light_flag & PASSMASK(DIFFUSE_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_direct, L->direct_diffuse);
+  if (light_flag & PASSMASK(GLOSSY_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_direct, L->direct_glossy);
+  if (light_flag & PASSMASK(TRANSMISSION_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_direct,
+                             L->direct_transmission);
+  if (light_flag & PASSMASK(SUBSURFACE_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_direct,
+                             L->direct_subsurface);
+  if (light_flag & PASSMASK(VOLUME_DIRECT))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_volume_direct, L->direct_scatter);
+
+  if (light_flag & PASSMASK(EMISSION))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_emission, L->emission);
+  if (light_flag & PASSMASK(BACKGROUND))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_background, L->background);
+  if (light_flag & PASSMASK(AO))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_ao, L->ao);
+
+  if (light_flag & PASSMASK(DIFFUSE_COLOR))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_diffuse_color, L->color_diffuse);
+  if (light_flag & PASSMASK(GLOSSY_COLOR))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_glossy_color, L->color_glossy);
+  if (light_flag & PASSMASK(TRANSMISSION_COLOR))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_transmission_color,
+                             L->color_transmission);
+  if (light_flag & PASSMASK(SUBSURFACE_COLOR))
+    kernel_write_pass_float3(buffer + kernel_data.film.pass_subsurface_color, L->color_subsurface);
+  if (light_flag & PASSMASK(SHADOW)) {
+    float4 shadow = L->shadow;
+    shadow.w = kernel_data.film.pass_shadow_scale;
+    kernel_write_pass_float4(buffer + kernel_data.film.pass_shadow, shadow);
+  }
+  if (light_flag & PASSMASK(MIST))
+    kernel_write_pass_float(buffer + kernel_data.film.pass_mist, 1.0f - L->mist);
+#endif
+}
+
+ccl_device_inline void kernel_write_result(KernelGlobals *kg,
+                                           ccl_global float *buffer,
+                                           int sample,
+                                           PathRadiance *L)
+{
+  PROFILING_INIT(kg, PROFILING_WRITE_RESULT);
+  PROFILING_OBJECT(PRIM_NONE);
+
+  float alpha;
+  float3 L_sum = path_radiance_clamp_and_sum(kg, L, &alpha);
+
+  if (kernel_data.film.pass_flag & PASSMASK(COMBINED)) {
+    kernel_write_pass_float4(buffer, make_float4(L_sum.x, L_sum.y, L_sum.z, alpha));
+  }
+
+  kernel_write_light_passes(kg, buffer, L);
+
+#ifdef __DENOISING_FEATURES__
+  if (kernel_data.film.pass_denoising_data) {
+#  ifdef __SHADOW_TRICKS__
+    kernel_write_denoising_shadow(kg,
+                                  buffer + kernel_data.film.pass_denoising_data,
+                                  sample,
+                                  average(L->path_total),
+                                  average(L->path_total_shaded));
+#  else
+    kernel_write_denoising_shadow(
+        kg, buffer + kernel_data.film.pass_denoising_data, sample, 0.0f, 0.0f);
+#  endif
+    if (kernel_data.film.pass_denoising_clean) {
+      float3 noisy, clean;
+      path_radiance_split_denoising(kg, L, &noisy, &clean);
+      kernel_write_pass_float3_variance(
+          buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_COLOR, noisy);
+      kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_clean, clean);
+    }
+    else {
+      kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+                                            DENOISING_PASS_COLOR,
+                                        ensure_finite3(L_sum));
+    }
+
+    kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+                                          DENOISING_PASS_NORMAL,
+                                      L->denoising_normal);
+    kernel_write_pass_float3_variance(buffer + kernel_data.film.pass_denoising_data +
+                                          DENOISING_PASS_ALBEDO,
+                                      L->denoising_albedo);
+    kernel_write_pass_float_variance(
+        buffer + kernel_data.film.pass_denoising_data + DENOISING_PASS_DEPTH, L->denoising_depth);
+  }
+#endif /* __DENOISING_FEATURES__ */
+
+#ifdef __KERNEL_DEBUG__
+  kernel_write_debug_passes(kg, buffer, L);
+#endif
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
--- a/intern/cycles/kernel/kernel_path_branched.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_path_branched.h	2020-01-10 20:42:43.464256721 +0300
@@ -523,6 +523,14 @@

   buffer += index * pass_stride;

+  if (kernel_data.film.pass_adaptive_aux_buffer) {
+    ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                   kernel_data.film.pass_adaptive_aux_buffer);
+    if (aux->w > 0.0f) {
+      return;
+    }
+  }
+
   /* initialize random numbers and ray */
   uint rng_hash;
   Ray ray;
diff -Naur a/intern/cycles/kernel/kernel_path_branched.h.orig b/intern/cycles/kernel/kernel_path_branched.h.orig
--- a/intern/cycles/kernel/kernel_path_branched.h.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernel_path_branched.h.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,545 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __BRANCHED_PATH__
+
+ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
+                                               ShaderData *sd,
+                                               ShaderData *emission_sd,
+                                               PathRadiance *L,
+                                               ccl_addr_space PathState *state,
+                                               float3 throughput)
+{
+  int num_samples = kernel_data.integrator.ao_samples;
+  float num_samples_inv = 1.0f / num_samples;
+  float ao_factor = kernel_data.background.ao_factor;
+  float3 ao_N;
+  float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+  float3 ao_alpha = shader_bsdf_alpha(kg, sd);
+
+  for (int j = 0; j < num_samples; j++) {
+    float bsdf_u, bsdf_v;
+    path_branched_rng_2D(
+        kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+    float3 ao_D;
+    float ao_pdf;
+
+    sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+    if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+      Ray light_ray;
+      float3 ao_shadow;
+
+      light_ray.P = ray_offset(sd->P, sd->Ng);
+      light_ray.D = ao_D;
+      light_ray.t = kernel_data.background.ao_distance;
+      light_ray.time = sd->time;
+      light_ray.dP = sd->dP;
+      light_ray.dD = differential3_zero();
+
+      if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
+        path_radiance_accum_ao(
+            kg, L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
+      }
+      else {
+        path_radiance_accum_total_ao(L, state, throughput * num_samples_inv, ao_bsdf);
+      }
+    }
+  }
+}
+
+#  ifndef __SPLIT_KERNEL__
+
+#    ifdef __VOLUME__
+ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
+                                                        ShaderData *sd,
+                                                        PathState *state,
+                                                        Ray *ray,
+                                                        float3 *throughput,
+                                                        ccl_addr_space Intersection *isect,
+                                                        bool hit,
+                                                        ShaderData *indirect_sd,
+                                                        ShaderData *emission_sd,
+                                                        PathRadiance *L)
+{
+  /* Sanitize volume stack. */
+  if (!hit) {
+    kernel_volume_clean_stack(kg, state->volume_stack);
+  }
+
+  if (state->volume_stack[0].shader == SHADER_NONE) {
+    return;
+  }
+
+  /* volume attenuation, emission, scatter */
+  Ray volume_ray = *ray;
+  volume_ray.t = (hit) ? isect->t : FLT_MAX;
+
+  bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+#      ifdef __VOLUME_DECOUPLED__
+  /* decoupled ray marching only supported on CPU */
+  if (kernel_data.integrator.volume_decoupled) {
+    /* cache steps along volume for repeated sampling */
+    VolumeSegment volume_segment;
+
+    shader_setup_from_volume(kg, sd, &volume_ray);
+    kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+
+    /* direct light sampling */
+    if (volume_segment.closure_flag & SD_SCATTER) {
+      volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+
+      int all = kernel_data.integrator.sample_all_lights_direct;
+
+      kernel_branched_path_volume_connect_light(
+          kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
+
+      /* indirect light sampling */
+      int num_samples = kernel_data.integrator.volume_samples;
+      float num_samples_inv = 1.0f / num_samples;
+
+      for (int j = 0; j < num_samples; j++) {
+        PathState ps = *state;
+        Ray pray = *ray;
+        float3 tp = *throughput;
+
+        /* branch RNG state */
+        path_state_branch(&ps, j, num_samples);
+
+        /* scatter sample. if we use distance sampling and take just one
+         * sample for direct and indirect light, we could share this
+         * computation, but makes code a bit complex */
+        float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
+        float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
+
+        VolumeIntegrateResult result = kernel_volume_decoupled_scatter(
+            kg, &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+        if (result == VOLUME_PATH_SCATTERED &&
+            kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
+          kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp * num_samples_inv, &ps, L);
+
+          /* for render passes, sum and reset indirect light pass variables
+           * for the next samples */
+          path_radiance_sum_indirect(L);
+          path_radiance_reset_indirect(L);
+        }
+      }
+    }
+
+    /* emission and transmittance */
+    if (volume_segment.closure_flag & SD_EMISSION)
+      path_radiance_accum_emission(kg, L, state, *throughput, volume_segment.accum_emission);
+    *throughput *= volume_segment.accum_transmittance;
+
+    /* free cached steps */
+    kernel_volume_decoupled_free(kg, &volume_segment);
+  }
+  else
+#      endif /* __VOLUME_DECOUPLED__ */
+  {
+    /* GPU: no decoupled ray marching, scatter probalistically */
+    int num_samples = kernel_data.integrator.volume_samples;
+    float num_samples_inv = 1.0f / num_samples;
+
+    /* todo: we should cache the shader evaluations from stepping
+     * through the volume, for now we redo them multiple times */
+
+    for (int j = 0; j < num_samples; j++) {
+      PathState ps = *state;
+      Ray pray = *ray;
+      float3 tp = (*throughput) * num_samples_inv;
+
+      /* branch RNG state */
+      path_state_branch(&ps, j, num_samples);
+
+      VolumeIntegrateResult result = kernel_volume_integrate(
+          kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
+
+#      ifdef __VOLUME_SCATTER__
+      if (result == VOLUME_PATH_SCATTERED) {
+        /* todo: support equiangular, MIS and all light sampling.
+         * alternatively get decoupled ray marching working on the GPU */
+        kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
+
+        if (kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
+          kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp, &ps, L);
+
+          /* for render passes, sum and reset indirect light pass variables
+           * for the next samples */
+          path_radiance_sum_indirect(L);
+          path_radiance_reset_indirect(L);
+        }
+      }
+#      endif /* __VOLUME_SCATTER__ */
+    }
+
+    /* todo: avoid this calculation using decoupled ray marching */
+    kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
+  }
+}
+#    endif /* __VOLUME__ */
+
+/* bounce off surface and integrate indirect light */
+ccl_device_noinline_cpu void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
+                                                                         ShaderData *sd,
+                                                                         ShaderData *indirect_sd,
+                                                                         ShaderData *emission_sd,
+                                                                         float3 throughput,
+                                                                         float num_samples_adjust,
+                                                                         PathState *state,
+                                                                         PathRadiance *L)
+{
+  float sum_sample_weight = 0.0f;
+#    ifdef __DENOISING_FEATURES__
+  if (state->denoising_feature_weight > 0.0f) {
+    for (int i = 0; i < sd->num_closure; i++) {
+      const ShaderClosure *sc = &sd->closure[i];
+
+      /* transparency is not handled here, but in outer loop */
+      if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+        continue;
+      }
+
+      sum_sample_weight += sc->sample_weight;
+    }
+  }
+  else {
+    sum_sample_weight = 1.0f;
+  }
+#    endif /* __DENOISING_FEATURES__ */
+
+  for (int i = 0; i < sd->num_closure; i++) {
+    const ShaderClosure *sc = &sd->closure[i];
+
+    /* transparency is not handled here, but in outer loop */
+    if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
+      continue;
+    }
+
+    int num_samples;
+
+    if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+      num_samples = kernel_data.integrator.diffuse_samples;
+    else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
+      num_samples = 1;
+    else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
+      num_samples = kernel_data.integrator.glossy_samples;
+    else
+      num_samples = kernel_data.integrator.transmission_samples;
+
+    num_samples = ceil_to_int(num_samples_adjust * num_samples);
+
+    float num_samples_inv = num_samples_adjust / num_samples;
+
+    for (int j = 0; j < num_samples; j++) {
+      PathState ps = *state;
+      float3 tp = throughput;
+      Ray bsdf_ray;
+#    ifdef __SHADOW_TRICKS__
+      float shadow_transparency = L->shadow_transparency;
+#    endif
+
+      ps.rng_hash = cmj_hash(state->rng_hash, i);
+
+      if (!kernel_branched_path_surface_bounce(
+              kg, sd, sc, j, num_samples, &tp, &ps, &L->state, &bsdf_ray, sum_sample_weight)) {
+        continue;
+      }
+
+      ps.rng_hash = state->rng_hash;
+
+      kernel_path_indirect(kg, indirect_sd, emission_sd, &bsdf_ray, tp * num_samples_inv, &ps, L);
+
+      /* for render passes, sum and reset indirect light pass variables
+       * for the next samples */
+      path_radiance_sum_indirect(L);
+      path_radiance_reset_indirect(L);
+
+#    ifdef __SHADOW_TRICKS__
+      L->shadow_transparency = shadow_transparency;
+#    endif
+    }
+  }
+}
+
+#    ifdef __SUBSURFACE__
+ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
+                                                        ShaderData *sd,
+                                                        ShaderData *indirect_sd,
+                                                        ShaderData *emission_sd,
+                                                        PathRadiance *L,
+                                                        PathState *state,
+                                                        Ray *ray,
+                                                        float3 throughput)
+{
+  for (int i = 0; i < sd->num_closure; i++) {
+    ShaderClosure *sc = &sd->closure[i];
+
+    if (!CLOSURE_IS_BSSRDF(sc->type))
+      continue;
+
+    /* set up random number generator */
+    uint lcg_state = lcg_state_init(state, 0x68bc21eb);
+    int num_samples = kernel_data.integrator.subsurface_samples * 3;
+    float num_samples_inv = 1.0f / num_samples;
+    uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i);
+
+    /* do subsurface scatter step with copy of shader data, this will
+     * replace the BSSRDF with a diffuse BSDF closure */
+    for (int j = 0; j < num_samples; j++) {
+      PathState hit_state = *state;
+      path_state_branch(&hit_state, j, num_samples);
+      hit_state.rng_hash = bssrdf_rng_hash;
+
+      LocalIntersection ss_isect;
+      float bssrdf_u, bssrdf_v;
+      path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+      int num_hits = subsurface_scatter_multi_intersect(
+          kg, &ss_isect, sd, &hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+
+      hit_state.rng_offset += PRNG_BOUNCE_NUM;
+
+#      ifdef __VOLUME__
+      Ray volume_ray = *ray;
+      bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
+                                      sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#      endif /* __VOLUME__ */
+
+      /* compute lighting with the BSDF closure */
+      for (int hit = 0; hit < num_hits; hit++) {
+        ShaderData bssrdf_sd = *sd;
+        Bssrdf *bssrdf = (Bssrdf *)sc;
+        ClosureType bssrdf_type = sc->type;
+        float bssrdf_roughness = bssrdf->roughness;
+        subsurface_scatter_multi_setup(
+            kg, &ss_isect, hit, &bssrdf_sd, &hit_state, bssrdf_type, bssrdf_roughness);
+
+#      ifdef __VOLUME__
+        if (need_update_volume_stack) {
+          /* Setup ray from previous surface point to the new one. */
+          float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
+          volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+          for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
+            hit_state.volume_stack[k] = state->volume_stack[k];
+          }
+
+          kernel_volume_stack_update_for_subsurface(
+              kg, emission_sd, &volume_ray, hit_state.volume_stack);
+        }
+#      endif /* __VOLUME__ */
+
+#      ifdef __EMISSION__
+        /* direct light */
+        if (kernel_data.integrator.use_direct_light) {
+          int all = (kernel_data.integrator.sample_all_lights_direct) ||
+                    (hit_state.flag & PATH_RAY_SHADOW_CATCHER);
+          kernel_branched_path_surface_connect_light(
+              kg, &bssrdf_sd, emission_sd, &hit_state, throughput, num_samples_inv, L, all);
+        }
+#      endif /* __EMISSION__ */
+
+        /* indirect light */
+        kernel_branched_path_surface_indirect_light(
+            kg, &bssrdf_sd, indirect_sd, emission_sd, throughput, num_samples_inv, &hit_state, L);
+      }
+    }
+  }
+}
+#    endif /* __SUBSURFACE__ */
+
+ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
+                                               uint rng_hash,
+                                               int sample,
+                                               Ray ray,
+                                               ccl_global float *buffer,
+                                               PathRadiance *L)
+{
+  /* initialize */
+  float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+  path_radiance_init(kg, L);
+
+  /* shader data memory used for both volumes and surfaces, saves stack space */
+  ShaderData sd;
+  /* shader data used by emission, shadows, volume stacks, indirect path */
+  ShaderDataTinyStorage emission_sd_storage;
+  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+  ShaderData indirect_sd;
+
+  PathState state;
+  path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+
+  /* Main Loop
+   * Here we only handle transparency intersections from the camera ray.
+   * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
+   */
+  for (;;) {
+    /* Find intersection with objects in scene. */
+    Intersection isect;
+    bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
+
+#    ifdef __VOLUME__
+    /* Volume integration. */
+    kernel_branched_path_volume(
+        kg, &sd, &state, &ray, &throughput, &isect, hit, &indirect_sd, emission_sd, L);
+#    endif /* __VOLUME__ */
+
+    /* Shade background. */
+    if (!hit) {
+      kernel_path_background(kg, &state, &ray, throughput, &sd, buffer, L);
+      break;
+    }
+
+    /* Setup and evaluate shader. */
+    shader_setup_from_ray(kg, &sd, &isect, &ray);
+
+    /* Skip most work for volume bounding surface. */
+#    ifdef __VOLUME__
+    if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+#    endif
+
+      shader_eval_surface(kg, &sd, &state, buffer, state.flag);
+      shader_merge_closures(&sd);
+
+      /* Apply shadow catcher, holdout, emission. */
+      if (!kernel_path_shader_apply(kg, &sd, &state, &ray, throughput, emission_sd, L, buffer)) {
+        break;
+      }
+
+      /* transparency termination */
+      if (state.flag & PATH_RAY_TRANSPARENT) {
+        /* path termination. this is a strange place to put the termination, it's
+         * mainly due to the mixed in MIS that we use. gives too many unneeded
+         * shader evaluations, only need emission if we are going to terminate */
+        float probability = path_state_continuation_probability(kg, &state, throughput);
+
+        if (probability == 0.0f) {
+          break;
+        }
+        else if (probability != 1.0f) {
+          float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE);
+
+          if (terminate >= probability)
+            break;
+
+          throughput /= probability;
+        }
+      }
+
+#    ifdef __DENOISING_FEATURES__
+      kernel_update_denoising_features(kg, &sd, &state, L);
+#    endif
+
+#    ifdef __AO__
+      /* ambient occlusion */
+      if (kernel_data.integrator.use_ambient_occlusion) {
+        kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
+      }
+#    endif /* __AO__ */
+
+#    ifdef __SUBSURFACE__
+      /* bssrdf scatter to a different location on the same object */
+      if (sd.flag & SD_BSSRDF) {
+        kernel_branched_path_subsurface_scatter(
+            kg, &sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
+      }
+#    endif /* __SUBSURFACE__ */
+
+      PathState hit_state = state;
+
+#    ifdef __EMISSION__
+      /* direct light */
+      if (kernel_data.integrator.use_direct_light) {
+        int all = (kernel_data.integrator.sample_all_lights_direct) ||
+                  (state.flag & PATH_RAY_SHADOW_CATCHER);
+        kernel_branched_path_surface_connect_light(
+            kg, &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
+      }
+#    endif /* __EMISSION__ */
+
+      /* indirect light */
+      kernel_branched_path_surface_indirect_light(
+          kg, &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
+
+      /* continue in case of transparency */
+      throughput *= shader_bsdf_transparency(kg, &sd);
+
+      if (is_zero(throughput))
+        break;
+
+      /* Update Path State */
+      path_state_next(kg, &state, LABEL_TRANSPARENT);
+
+#    ifdef __VOLUME__
+    }
+    else {
+      if (!path_state_volume_next(kg, &state)) {
+        break;
+      }
+    }
+#    endif
+
+    ray.P = ray_offset(sd.P, -sd.Ng);
+    ray.t -= sd.ray_length; /* clipping works through transparent */
+
+#    ifdef __RAY_DIFFERENTIALS__
+    ray.dP = sd.dP;
+    ray.dD.dx = -sd.dI.dx;
+    ray.dD.dy = -sd.dI.dy;
+#    endif /* __RAY_DIFFERENTIALS__ */
+
+#    ifdef __VOLUME__
+    /* enter/exit volume */
+    kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
+#    endif /* __VOLUME__ */
+  }
+}
+
+ccl_device void kernel_branched_path_trace(
+    KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
+{
+  /* buffer offset */
+  int index = offset + x + y * stride;
+  int pass_stride = kernel_data.film.pass_stride;
+
+  buffer += index * pass_stride;
+
+  /* initialize random numbers and ray */
+  uint rng_hash;
+  Ray ray;
+
+  kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+
+  /* integrate */
+  PathRadiance L;
+
+  if (ray.t != 0.0f) {
+    kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L);
+    kernel_write_result(kg, buffer, sample, &L);
+  }
+}
+
+#  endif /* __SPLIT_KERNEL__ */
+
+#endif /* __BRANCHED_PATH__ */
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
--- a/intern/cycles/kernel/kernel_path.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_path.h	2020-01-10 20:42:43.464256721 +0300
@@ -31,6 +31,7 @@
 #include "kernel/kernel_accumulate.h"
 #include "kernel/kernel_shader.h"
 #include "kernel/kernel_light.h"
+#include "kernel/kernel_adaptive_sampling.h"
 #include "kernel/kernel_passes.h"

 #if defined(__VOLUME__) || defined(__SUBSURFACE__)
@@ -656,6 +657,14 @@

   buffer += index * pass_stride;

+  if (kernel_data.film.pass_adaptive_aux_buffer) {
+    ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                   kernel_data.film.pass_adaptive_aux_buffer);
+    if (aux->w > 0.0f) {
+      return;
+    }
+  }
+
   /* Initialize random numbers and sample ray. */
   uint rng_hash;
   Ray ray;
diff -Naur a/intern/cycles/kernel/kernel_path.h.orig b/intern/cycles/kernel/kernel_path.h.orig
--- a/intern/cycles/kernel/kernel_path.h.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernel_path.h.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,698 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __OSL__
+#  include "kernel/osl/osl_shader.h"
+#endif
+
+#include "kernel/kernel_random.h"
+#include "kernel/kernel_projection.h"
+#include "kernel/kernel_montecarlo.h"
+#include "kernel/kernel_differential.h"
+#include "kernel/kernel_camera.h"
+
+#include "kernel/geom/geom.h"
+#include "kernel/bvh/bvh.h"
+
+#include "kernel/kernel_write_passes.h"
+#include "kernel/kernel_accumulate.h"
+#include "kernel/kernel_shader.h"
+#include "kernel/kernel_light.h"
+#include "kernel/kernel_passes.h"
+
+#if defined(__VOLUME__) || defined(__SUBSURFACE__)
+#  include "kernel/kernel_volume.h"
+#endif
+
+#ifdef __SUBSURFACE__
+#  include "kernel/kernel_subsurface.h"
+#endif
+
+#include "kernel/kernel_path_state.h"
+#include "kernel/kernel_shadow.h"
+#include "kernel/kernel_emission.h"
+#include "kernel/kernel_path_common.h"
+#include "kernel/kernel_path_surface.h"
+#include "kernel/kernel_path_volume.h"
+#include "kernel/kernel_path_subsurface.h"
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg,
+                                                        ccl_addr_space PathState *state,
+                                                        Ray *ray,
+                                                        Intersection *isect,
+                                                        PathRadiance *L)
+{
+  PROFILING_INIT(kg, PROFILING_SCENE_INTERSECT);
+
+  uint visibility = path_state_ray_visibility(kg, state);
+
+  if (path_state_ao_bounce(kg, state)) {
+    visibility = PATH_RAY_SHADOW;
+    ray->t = kernel_data.background.ao_distance;
+  }
+
+  bool hit = scene_intersect(kg, ray, visibility, isect);
+
+#ifdef __KERNEL_DEBUG__
+  if (state->flag & PATH_RAY_CAMERA) {
+    L->debug_data.num_bvh_traversed_nodes += isect->num_traversed_nodes;
+    L->debug_data.num_bvh_traversed_instances += isect->num_traversed_instances;
+    L->debug_data.num_bvh_intersections += isect->num_intersections;
+  }
+  L->debug_data.num_ray_bounces++;
+#endif /* __KERNEL_DEBUG__ */
+
+  return hit;
+}
+
+ccl_device_forceinline void kernel_path_lamp_emission(KernelGlobals *kg,
+                                                      ccl_addr_space PathState *state,
+                                                      Ray *ray,
+                                                      float3 throughput,
+                                                      ccl_addr_space Intersection *isect,
+                                                      ShaderData *emission_sd,
+                                                      PathRadiance *L)
+{
+  PROFILING_INIT(kg, PROFILING_INDIRECT_EMISSION);
+
+#ifdef __LAMP_MIS__
+  if (kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
+    /* ray starting from previous non-transparent bounce */
+    Ray light_ray ccl_optional_struct_init;
+
+    light_ray.P = ray->P - state->ray_t * ray->D;
+    state->ray_t += isect->t;
+    light_ray.D = ray->D;
+    light_ray.t = state->ray_t;
+    light_ray.time = ray->time;
+    light_ray.dD = ray->dD;
+    light_ray.dP = ray->dP;
+
+    /* intersect with lamp */
+    indirect_lamp_emission(kg, emission_sd, state, L, &light_ray, throughput);
+  }
+#endif /* __LAMP_MIS__ */
+}
+
+ccl_device_forceinline void kernel_path_background(KernelGlobals *kg,
+                                                   ccl_addr_space PathState *state,
+                                                   ccl_addr_space Ray *ray,
+                                                   float3 throughput,
+                                                   ShaderData *sd,
+                                                   ccl_global float *buffer,
+                                                   PathRadiance *L)
+{
+  /* eval background shader if nothing hit */
+  if (kernel_data.background.transparent && (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+    L->transparent += average(throughput);
+
+#ifdef __PASSES__
+    if (!(kernel_data.film.light_pass_flag & PASSMASK(BACKGROUND)))
+#endif /* __PASSES__ */
+      return;
+  }
+
+  /* When using the ao bounces approximation, adjust background
+   * shader intensity with ao factor. */
+  if (path_state_ao_bounce(kg, state)) {
+    throughput *= kernel_data.background.ao_bounces_factor;
+  }
+
+#ifdef __BACKGROUND__
+  /* sample background shader */
+  float3 L_background = indirect_background(kg, sd, state, buffer, ray);
+  path_radiance_accum_background(kg, L, state, throughput, L_background);
+#endif /* __BACKGROUND__ */
+}
+
+#ifndef __SPLIT_KERNEL__
+
+#  ifdef __VOLUME__
+ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(KernelGlobals *kg,
+                                                                ShaderData *sd,
+                                                                PathState *state,
+                                                                Ray *ray,
+                                                                float3 *throughput,
+                                                                ccl_addr_space Intersection *isect,
+                                                                bool hit,
+                                                                ShaderData *emission_sd,
+                                                                PathRadiance *L)
+{
+  PROFILING_INIT(kg, PROFILING_VOLUME);
+
+  /* Sanitize volume stack. */
+  if (!hit) {
+    kernel_volume_clean_stack(kg, state->volume_stack);
+  }
+
+  if (state->volume_stack[0].shader == SHADER_NONE) {
+    return VOLUME_PATH_ATTENUATED;
+  }
+
+  /* volume attenuation, emission, scatter */
+  Ray volume_ray = *ray;
+  volume_ray.t = (hit) ? isect->t : FLT_MAX;
+
+  bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+#    ifdef __VOLUME_DECOUPLED__
+  int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+  bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
+  bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+
+  if (decoupled) {
+    /* cache steps along volume for repeated sampling */
+    VolumeSegment volume_segment;
+
+    shader_setup_from_volume(kg, sd, &volume_ray);
+    kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
+
+    volume_segment.sampling_method = sampling_method;
+
+    /* emission */
+    if (volume_segment.closure_flag & SD_EMISSION)
+      path_radiance_accum_emission(kg, L, state, *throughput, volume_segment.accum_emission);
+
+    /* scattering */
+    VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+
+    if (volume_segment.closure_flag & SD_SCATTER) {
+      int all = kernel_data.integrator.sample_all_lights_indirect;
+
+      /* direct light sampling */
+      kernel_branched_path_volume_connect_light(
+          kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
+
+      /* indirect sample. if we use distance sampling and take just
+       * one sample for direct and indirect light, we could share
+       * this computation, but makes code a bit complex */
+      float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+      float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+
+      result = kernel_volume_decoupled_scatter(
+          kg, state, &volume_ray, sd, throughput, rphase, rscatter, &volume_segment, NULL, true);
+    }
+
+    /* free cached steps */
+    kernel_volume_decoupled_free(kg, &volume_segment);
+
+    if (result == VOLUME_PATH_SCATTERED) {
+      if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+        return VOLUME_PATH_SCATTERED;
+      else
+        return VOLUME_PATH_MISSED;
+    }
+    else {
+      *throughput *= volume_segment.accum_transmittance;
+    }
+  }
+  else
+#    endif /* __VOLUME_DECOUPLED__ */
+  {
+    /* integrate along volume segment with distance sampling */
+    VolumeIntegrateResult result = kernel_volume_integrate(
+        kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+
+#    ifdef __VOLUME_SCATTER__
+    if (result == VOLUME_PATH_SCATTERED) {
+      /* direct lighting */
+      kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+      /* indirect light bounce */
+      if (kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+        return VOLUME_PATH_SCATTERED;
+      else
+        return VOLUME_PATH_MISSED;
+    }
+#    endif /* __VOLUME_SCATTER__ */
+  }
+
+  return VOLUME_PATH_ATTENUATED;
+}
+#  endif /* __VOLUME__ */
+
+#endif /* __SPLIT_KERNEL__ */
+
+ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg,
+                                                     ShaderData *sd,
+                                                     ccl_addr_space PathState *state,
+                                                     ccl_addr_space Ray *ray,
+                                                     float3 throughput,
+                                                     ShaderData *emission_sd,
+                                                     PathRadiance *L,
+                                                     ccl_global float *buffer)
+{
+  PROFILING_INIT(kg, PROFILING_SHADER_APPLY);
+
+#ifdef __SHADOW_TRICKS__
+  if ((sd->object_flag & SD_OBJECT_SHADOW_CATCHER)) {
+    if (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND) {
+      state->flag |= (PATH_RAY_SHADOW_CATCHER | PATH_RAY_STORE_SHADOW_INFO);
+
+      float3 bg = make_float3(0.0f, 0.0f, 0.0f);
+      if (!kernel_data.background.transparent) {
+        bg = indirect_background(kg, emission_sd, state, NULL, ray);
+      }
+      path_radiance_accum_shadowcatcher(L, throughput, bg);
+    }
+  }
+  else if (state->flag & PATH_RAY_SHADOW_CATCHER) {
+    /* Only update transparency after shadow catcher bounce. */
+    L->shadow_transparency *= average(shader_bsdf_transparency(kg, sd));
+  }
+#endif /* __SHADOW_TRICKS__ */
+
+  /* holdout */
+#ifdef __HOLDOUT__
+  if (((sd->flag & SD_HOLDOUT) || (sd->object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
+      (state->flag & PATH_RAY_TRANSPARENT_BACKGROUND)) {
+    if (kernel_data.background.transparent) {
+      float3 holdout_weight;
+      if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+        holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
+      }
+      else {
+        holdout_weight = shader_holdout_eval(kg, sd);
+      }
+      /* any throughput is ok, should all be identical here */
+      L->transparent += average(holdout_weight * throughput);
+    }
+
+    if (sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
+      return false;
+    }
+  }
+#endif /* __HOLDOUT__ */
+
+  /* holdout mask objects do not write data passes */
+  kernel_write_data_passes(kg, buffer, L, sd, state, throughput);
+
+  /* blurring of bsdf after bounces, for rays that have a small likelihood
+   * of following this particular path (diffuse, rough glossy) */
+  if (kernel_data.integrator.filter_glossy != FLT_MAX) {
+    float blur_pdf = kernel_data.integrator.filter_glossy * state->min_ray_pdf;
+
+    if (blur_pdf < 1.0f) {
+      float blur_roughness = sqrtf(1.0f - blur_pdf) * 0.5f;
+      shader_bsdf_blur(kg, sd, blur_roughness);
+    }
+  }
+
+#ifdef __EMISSION__
+  /* emission */
+  if (sd->flag & SD_EMISSION) {
+    float3 emission = indirect_primitive_emission(
+        kg, sd, sd->ray_length, state->flag, state->ray_pdf);
+    path_radiance_accum_emission(kg, L, state, throughput, emission);
+  }
+#endif /* __EMISSION__ */
+
+  return true;
+}
+
+#ifdef __KERNEL_OPTIX__
+ccl_device_inline /* inline trace calls */
+#else
+ccl_device_noinline
+#endif
+    void
+    kernel_path_ao(KernelGlobals *kg,
+                   ShaderData *sd,
+                   ShaderData *emission_sd,
+                   PathRadiance *L,
+                   ccl_addr_space PathState *state,
+                   float3 throughput,
+                   float3 ao_alpha)
+{
+  PROFILING_INIT(kg, PROFILING_AO);
+
+  /* todo: solve correlation */
+  float bsdf_u, bsdf_v;
+
+  path_state_rng_2D(kg, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+  float ao_factor = kernel_data.background.ao_factor;
+  float3 ao_N;
+  float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+  float3 ao_D;
+  float ao_pdf;
+
+  sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+  if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+    Ray light_ray;
+    float3 ao_shadow;
+
+    light_ray.P = ray_offset(sd->P, sd->Ng);
+    light_ray.D = ao_D;
+    light_ray.t = kernel_data.background.ao_distance;
+    light_ray.time = sd->time;
+    light_ray.dP = sd->dP;
+    light_ray.dD = differential3_zero();
+
+    if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
+      path_radiance_accum_ao(kg, L, state, throughput, ao_alpha, ao_bsdf, ao_shadow);
+    }
+    else {
+      path_radiance_accum_total_ao(L, state, throughput, ao_bsdf);
+    }
+  }
+}
+
+#ifndef __SPLIT_KERNEL__
+
+#  if defined(__BRANCHED_PATH__) || defined(__BAKING__)
+
+ccl_device void kernel_path_indirect(KernelGlobals *kg,
+                                     ShaderData *sd,
+                                     ShaderData *emission_sd,
+                                     Ray *ray,
+                                     float3 throughput,
+                                     PathState *state,
+                                     PathRadiance *L)
+{
+#    ifdef __SUBSURFACE__
+  SubsurfaceIndirectRays ss_indirect;
+  kernel_path_subsurface_init_indirect(&ss_indirect);
+
+  for (;;) {
+#    endif /* __SUBSURFACE__ */
+
+    /* path iteration */
+    for (;;) {
+      /* Find intersection with objects in scene. */
+      Intersection isect;
+      bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
+
+      /* Find intersection with lamps and compute emission for MIS. */
+      kernel_path_lamp_emission(kg, state, ray, throughput, &isect, sd, L);
+
+#    ifdef __VOLUME__
+      /* Volume integration. */
+      VolumeIntegrateResult result = kernel_path_volume(
+          kg, sd, state, ray, &throughput, &isect, hit, emission_sd, L);
+
+      if (result == VOLUME_PATH_SCATTERED) {
+        continue;
+      }
+      else if (result == VOLUME_PATH_MISSED) {
+        break;
+      }
+#    endif /* __VOLUME__*/
+
+      /* Shade background. */
+      if (!hit) {
+        kernel_path_background(kg, state, ray, throughput, sd, NULL, L);
+        break;
+      }
+      else if (path_state_ao_bounce(kg, state)) {
+        break;
+      }
+
+      /* Setup shader data. */
+      shader_setup_from_ray(kg, sd, &isect, ray);
+
+      /* Skip most work for volume bounding surface. */
+#    ifdef __VOLUME__
+      if (!(sd->flag & SD_HAS_ONLY_VOLUME)) {
+#    endif
+
+        /* Evaluate shader. */
+        shader_eval_surface(kg, sd, state, NULL, state->flag);
+        shader_prepare_closures(sd, state);
+
+        /* Apply shadow catcher, holdout, emission. */
+        if (!kernel_path_shader_apply(kg, sd, state, ray, throughput, emission_sd, L, NULL)) {
+          break;
+        }
+
+        /* path termination. this is a strange place to put the termination, it's
+         * mainly due to the mixed in MIS that we use. gives too many unneeded
+         * shader evaluations, only need emission if we are going to terminate */
+        float probability = path_state_continuation_probability(kg, state, throughput);
+
+        if (probability == 0.0f) {
+          break;
+        }
+        else if (probability != 1.0f) {
+          float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+
+          if (terminate >= probability)
+            break;
+
+          throughput /= probability;
+        }
+
+#    ifdef __DENOISING_FEATURES__
+        kernel_update_denoising_features(kg, sd, state, L);
+#    endif
+
+#    ifdef __AO__
+        /* ambient occlusion */
+        if (kernel_data.integrator.use_ambient_occlusion) {
+          kernel_path_ao(kg, sd, emission_sd, L, state, throughput, make_float3(0.0f, 0.0f, 0.0f));
+        }
+#    endif /* __AO__ */
+
+#    ifdef __SUBSURFACE__
+        /* bssrdf scatter to a different location on the same object, replacing
+         * the closures with a diffuse BSDF */
+        if (sd->flag & SD_BSSRDF) {
+          if (kernel_path_subsurface_scatter(
+                  kg, sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
+            break;
+          }
+        }
+#    endif /* __SUBSURFACE__ */
+
+#    if defined(__EMISSION__)
+        int all = (kernel_data.integrator.sample_all_lights_indirect) ||
+                  (state->flag & PATH_RAY_SHADOW_CATCHER);
+        kernel_branched_path_surface_connect_light(
+            kg, sd, emission_sd, state, throughput, 1.0f, L, all);
+#    endif /* defined(__EMISSION__) */
+
+#    ifdef __VOLUME__
+      }
+#    endif
+
+      if (!kernel_path_surface_bounce(kg, sd, &throughput, state, &L->state, ray))
+        break;
+    }
+
+#    ifdef __SUBSURFACE__
+    /* Trace indirect subsurface rays by restarting the loop. this uses less
+     * stack memory than invoking kernel_path_indirect.
+     */
+    if (ss_indirect.num_rays) {
+      kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput);
+    }
+    else {
+      break;
+    }
+  }
+#    endif /* __SUBSURFACE__ */
+}
+
+#  endif /* defined(__BRANCHED_PATH__) || defined(__BAKING__) */
+
+ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
+                                                  PathState *state,
+                                                  float3 throughput,
+                                                  Ray *ray,
+                                                  PathRadiance *L,
+                                                  ccl_global float *buffer,
+                                                  ShaderData *emission_sd)
+{
+  PROFILING_INIT(kg, PROFILING_PATH_INTEGRATE);
+
+  /* Shader data memory used for both volumes and surfaces, saves stack space. */
+  ShaderData sd;
+
+#  ifdef __SUBSURFACE__
+  SubsurfaceIndirectRays ss_indirect;
+  kernel_path_subsurface_init_indirect(&ss_indirect);
+
+  for (;;) {
+#  endif /* __SUBSURFACE__ */
+
+    /* path iteration */
+    for (;;) {
+      /* Find intersection with objects in scene. */
+      Intersection isect;
+      bool hit = kernel_path_scene_intersect(kg, state, ray, &isect, L);
+
+      /* Find intersection with lamps and compute emission for MIS. */
+      kernel_path_lamp_emission(kg, state, ray, throughput, &isect, &sd, L);
+
+#  ifdef __VOLUME__
+      /* Volume integration. */
+      VolumeIntegrateResult result = kernel_path_volume(
+          kg, &sd, state, ray, &throughput, &isect, hit, emission_sd, L);
+
+      if (result == VOLUME_PATH_SCATTERED) {
+        continue;
+      }
+      else if (result == VOLUME_PATH_MISSED) {
+        break;
+      }
+#  endif /* __VOLUME__*/
+
+      /* Shade background. */
+      if (!hit) {
+        kernel_path_background(kg, state, ray, throughput, &sd, buffer, L);
+        break;
+      }
+      else if (path_state_ao_bounce(kg, state)) {
+        break;
+      }
+
+      /* Setup shader data. */
+      shader_setup_from_ray(kg, &sd, &isect, ray);
+
+      /* Skip most work for volume bounding surface. */
+#  ifdef __VOLUME__
+      if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+#  endif
+
+        /* Evaluate shader. */
+        shader_eval_surface(kg, &sd, state, buffer, state->flag);
+        shader_prepare_closures(&sd, state);
+
+        /* Apply shadow catcher, holdout, emission. */
+        if (!kernel_path_shader_apply(kg, &sd, state, ray, throughput, emission_sd, L, buffer)) {
+          break;
+        }
+
+        /* path termination. this is a strange place to put the termination, it's
+         * mainly due to the mixed in MIS that we use. gives too many unneeded
+         * shader evaluations, only need emission if we are going to terminate */
+        float probability = path_state_continuation_probability(kg, state, throughput);
+
+        if (probability == 0.0f) {
+          break;
+        }
+        else if (probability != 1.0f) {
+          float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
+          if (terminate >= probability)
+            break;
+
+          throughput /= probability;
+        }
+
+#  ifdef __DENOISING_FEATURES__
+        kernel_update_denoising_features(kg, &sd, state, L);
+#  endif
+
+#  ifdef __AO__
+        /* ambient occlusion */
+        if (kernel_data.integrator.use_ambient_occlusion) {
+          kernel_path_ao(kg, &sd, emission_sd, L, state, throughput, shader_bsdf_alpha(kg, &sd));
+        }
+#  endif /* __AO__ */
+
+#  ifdef __SUBSURFACE__
+        /* bssrdf scatter to a different location on the same object, replacing
+         * the closures with a diffuse BSDF */
+        if (sd.flag & SD_BSSRDF) {
+          if (kernel_path_subsurface_scatter(
+                  kg, &sd, emission_sd, L, state, ray, &throughput, &ss_indirect)) {
+            break;
+          }
+        }
+#  endif /* __SUBSURFACE__ */
+
+#  ifdef __EMISSION__
+        /* direct lighting */
+        kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
+#  endif /* __EMISSION__ */
+
+#  ifdef __VOLUME__
+      }
+#  endif
+
+      /* compute direct lighting and next bounce */
+      if (!kernel_path_surface_bounce(kg, &sd, &throughput, state, &L->state, ray))
+        break;
+    }
+
+#  ifdef __SUBSURFACE__
+    /* Trace indirect subsurface rays by restarting the loop. this uses less
+     * stack memory than invoking kernel_path_indirect.
+     */
+    if (ss_indirect.num_rays) {
+      kernel_path_subsurface_setup_indirect(kg, &ss_indirect, state, ray, L, &throughput);
+    }
+    else {
+      break;
+    }
+  }
+#  endif /* __SUBSURFACE__ */
+}
+
+ccl_device void kernel_path_trace(
+    KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
+{
+  PROFILING_INIT(kg, PROFILING_RAY_SETUP);
+
+  /* buffer offset */
+  int index = offset + x + y * stride;
+  int pass_stride = kernel_data.film.pass_stride;
+
+  buffer += index * pass_stride;
+
+  /* Initialize random numbers and sample ray. */
+  uint rng_hash;
+  Ray ray;
+
+  kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
+
+#  ifndef __KERNEL_OPTIX__
+  if (ray.t == 0.0f) {
+    return;
+  }
+#  endif
+
+  /* Initialize state. */
+  float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+  PathRadiance L;
+  path_radiance_init(kg, &L);
+
+  ShaderDataTinyStorage emission_sd_storage;
+  ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
+
+  PathState state;
+  path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
+
+#  ifdef __KERNEL_OPTIX__
+  /* Force struct into local memory to avoid costly spilling on trace calls. */
+  if (pass_stride < 0) /* This is never executed and just prevents the compiler from doing SROA. */
+    for (int i = 0; i < sizeof(L); ++i)
+      reinterpret_cast<unsigned char *>(&L)[-pass_stride + i] = 0;
+#  endif
+
+  /* Integrate. */
+  kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd);
+
+  kernel_write_result(kg, buffer, sample, &L);
+}
+
+#endif /* __SPLIT_KERNEL__ */
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
--- a/intern/cycles/kernel/kernel_random.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_random.h	2020-01-10 20:55:57.757604393 +0300
@@ -43,20 +43,34 @@
   uint i = index + SOBOL_SKIP;
   for (int j = 0, x; (x = find_first_set(i)); i >>= x) {
     j += x;
-    result ^= kernel_tex_fetch(__sobol_directions, 32 * dimension + j - 1);
+    result ^= kernel_tex_fetch(__sample_pattern_lut, 32 * dimension + j);
   }
   return result;
 }

 #endif /* __SOBOL__ */

+#define NUM_PJ_SAMPLES 64 * 64
+#define NUM_PJ_PATTERNS 48
+
 ccl_device_forceinline float path_rng_1D(
     KernelGlobals *kg, uint rng_hash, int sample, int num_samples, int dimension)
 {
 #ifdef __DEBUG_CORRELATION__
   return (float)drand48();
 #endif
-
+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+    /* Fallback to random */
+    if (sample > NUM_PJ_SAMPLES) {
+      int p = rng_hash + dimension;
+      return cmj_randfloat(sample, p);
+    }
+    uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+    int index = ((dimension % NUM_PJ_PATTERNS) * NUM_PJ_SAMPLES + sample) * 2;
+    return __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^
+                           (tmp_rng & 0x007fffff)) -
+           1.0f;
+  }
 #ifdef __CMJ__
 #  ifdef __SOBOL__
   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -99,7 +113,22 @@
   *fy = (float)drand48();
   return;
 #endif
-
+  if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_PMJ) {
+    if (sample > NUM_PJ_SAMPLES) {
+      int p = rng_hash + dimension;
+      *fx = cmj_randfloat(sample, p);
+      *fy = cmj_randfloat(sample, p + 1);
+    }
+    uint tmp_rng = cmj_hash_simple(dimension, rng_hash);
+    int index = ((dimension % NUM_PJ_PATTERNS) * NUM_PJ_SAMPLES + sample) * 2;
+    *fx = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index) ^ (tmp_rng & 0x007fffff)) -
+          1.0f;
+    tmp_rng = cmj_hash_simple(dimension + 1, rng_hash);
+    *fy = __uint_as_float(kernel_tex_fetch(__sample_pattern_lut, index + 1) ^
+                          (tmp_rng & 0x007fffff)) -
+          1.0f;
+    return;
+  }
 #ifdef __CMJ__
 #  ifdef __SOBOL__
   if (kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ)
@@ -284,4 +313,23 @@
   return (float)*rng * (1.0f / (float)0xFFFFFFFF);
 }

+ccl_device_inline bool sample_is_even(int pattern, int sample)
+{
+  if (pattern == SAMPLING_PATTERN_PMJ) {
+    /* See Section 10.2.1, "Progressive Multi-Jittered Sample Sequences", Christensen et al.
+     * We can use this to get divide sample sequence into two classes for easier variance estimation.
+     * There must be a more elegant way of writing this? */
+    return (bool)(sample & 2) ^ (bool)(sample & 8) ^ (bool)(sample & 0x20) ^
+           (bool)(sample & 0x80) ^ (bool)(sample & 0x200) ^ (bool)(sample & 0x800) ^
+           (bool)(sample & 0x2000) ^ (bool)(sample & 0x8000) ^ (bool)(sample & 0x20000) ^
+           (bool)(sample & 0x80000) ^ (bool)(sample & 0x200000) ^ (bool)(sample & 0x800000) ^
+           (bool)(sample & 0x2000000) ^ (bool)(sample & 0x8000000) ^ (bool)(sample & 0x20000000) ^
+           (bool)(sample & 0x80000000);
+  }
+  else {
+    /* TODO: Are there reliable ways of dividing CMJ and Sobol into two classes? */
+    return sample & 0x1;
+  }
+}
+
 CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h	2020-01-10 20:42:43.467590055 +0300
@@ -89,5 +89,9 @@
 DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
 DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
 DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DECLARE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)

 #undef KERNEL_ARCH
diff -Naur a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h	2020-01-10 20:42:43.467590055 +0300
@@ -58,6 +58,10 @@
 #    include "kernel/split/kernel_next_iteration_setup.h"
 #    include "kernel/split/kernel_indirect_subsurface.h"
 #    include "kernel/split/kernel_buffer_update.h"
+#    include "kernel/split/kernel_adaptive_stopping.h"
+#    include "kernel/split/kernel_adaptive_filter_x.h"
+#    include "kernel/split/kernel_adaptive_filter_y.h"
+#    include "kernel/split/kernel_adaptive_adjust_samples.h"
 #  endif /* __SPLIT_KERNEL__ */
 #else
 #  define STUB_ASSERT(arch, name) \
@@ -204,6 +208,10 @@
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)
 #endif   /* __SPLIT_KERNEL__ */

 #undef KERNEL_STUB
diff -Naur a/intern/cycles/kernel/kernels/cuda/kernel.cu b/intern/cycles/kernel/kernels/cuda/kernel.cu
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu	2020-01-10 20:42:43.467590055 +0300
@@ -33,6 +33,7 @@
 #include "kernel/kernel_path_branched.h"
 #include "kernel/kernel_bake.h"
 #include "kernel/kernel_work_stealing.h"
+#include "kernel/kernel_adaptive_sampling.h"

 /* kernels */
 extern "C" __global__ void
@@ -83,6 +84,75 @@

 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_stopping(WorkTile *tile, int sample, uint total_work_size)
+{
+	int work_index = ccl_global_id(0);
+	bool thread_is_active = work_index < total_work_size;
+	KernelGlobals kg;
+	if(thread_is_active && kernel_data.film.pass_adaptive_aux_buffer) {
+		uint x = tile->x + work_index % tile->w;
+		uint y = tile->y + work_index / tile->w;
+		int index = tile->offset + x + y * tile->stride;
+		ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+		kernel_do_adaptive_stopping(&kg, buffer, sample);
+	}
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_x(WorkTile *tile, int sample, uint)
+{
+	KernelGlobals kg;
+	if(kernel_data.film.pass_adaptive_aux_buffer && sample > kernel_data.integrator.adaptive_min_samples) {
+		if(ccl_global_id(0) < tile->h) {
+			int y = tile->y + ccl_global_id(0);
+			kernel_do_adaptive_filter_x(&kg, y, tile);
+		}
+	}
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_filter_y(WorkTile *tile, int sample, uint)
+{
+	KernelGlobals kg;
+	if(kernel_data.film.pass_adaptive_aux_buffer && sample >= kernel_data.integrator.adaptive_min_samples - 1) {
+		if(ccl_global_id(0) < tile->w) {
+			int x = tile->x + ccl_global_id(0);
+			kernel_do_adaptive_filter_y(&kg, x, tile);
+		}
+	}
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_adaptive_scale_samples(WorkTile *tile, int start_sample, int sample, uint total_work_size)
+{
+	if(kernel_data.film.pass_adaptive_aux_buffer) {
+		int work_index = ccl_global_id(0);
+		bool thread_is_active = work_index < total_work_size;
+		KernelGlobals kg;
+		if(thread_is_active) {
+			uint x = tile->x + work_index % tile->w;
+			uint y = tile->y + work_index / tile->w;
+			int index = tile->offset + x + y * tile->stride;
+			ccl_global float *buffer = tile->buffer + index * kernel_data.film.pass_stride;
+			if(buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+				buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+				float sample_multiplier = sample / max((float)start_sample + 1.0f, buffer[kernel_data.film.pass_sample_count]);
+				if(sample_multiplier != 1.0f) {
+					kernel_adaptive_post_adjust(&kg, buffer, sample_multiplier);
+				}
+			}
+			else {
+				kernel_adaptive_post_adjust(&kg, buffer, sample / (sample - 1.0f));
+			}
+		}
+	}
+}
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
 kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
 {
 	int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
diff -Naur a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu	2020-01-10 20:42:43.467590055 +0300
@@ -43,6 +43,10 @@
 #include "kernel/split/kernel_next_iteration_setup.h"
 #include "kernel/split/kernel_indirect_subsurface.h"
 #include "kernel/split/kernel_buffer_update.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"

 #include "kernel/kernel_film.h"

@@ -121,6 +125,10 @@
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(next_iteration_setup, uint)
 DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
 DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(buffer_update, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_stopping)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_x)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_filter_y)
+DEFINE_SPLIT_KERNEL_FUNCTION(adaptive_adjust_samples)

 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl	2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_adjust_samples.h"
+
+#define KERNEL_NAME adaptive_adjust_samples
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_x.cl	2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_x.h"
+
+#define KERNEL_NAME adaptive_filter_x
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_filter_y.cl	2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_filter_y.h"
+
+#define KERNEL_NAME adaptive_filter_y
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_adaptive_stopping.cl	2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_adaptive_stopping.h"
+
+#define KERNEL_NAME adaptive_stopping
+#include "kernel/kernels/opencl/kernel_split_function.h"
+#undef KERNEL_NAME
diff -Naur a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl
--- a/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split_bundle.cl	2020-01-10 20:42:43.467590055 +0300
@@ -28,3 +28,7 @@
 #include "kernel/kernels/opencl/kernel_next_iteration_setup.cl"
 #include "kernel/kernels/opencl/kernel_indirect_subsurface.cl"
 #include "kernel/kernels/opencl/kernel_buffer_update.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_stopping.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_x.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_filter_y.cl"
+#include "kernel/kernels/opencl/kernel_adaptive_adjust_samples.cl"
diff -Naur a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
--- a/intern/cycles/kernel/kernel_textures.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_textures.h	2020-01-10 20:42:43.464256721 +0300
@@ -77,7 +77,7 @@
 KERNEL_TEX(float, __lookup_table)

 /* sobol */
-KERNEL_TEX(uint, __sobol_directions)
+KERNEL_TEX(uint, __sample_pattern_lut)

 /* image textures */
 KERNEL_TEX(TextureInfo, __texture_info)
diff -Naur a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
--- a/intern/cycles/kernel/kernel_types.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_types.h	2020-01-10 20:49:57.107597884 +0300
@@ -269,6 +269,7 @@
 enum SamplingPattern {
   SAMPLING_PATTERN_SOBOL = 0,
   SAMPLING_PATTERN_CMJ = 1,
+  SAMPLING_PATTERN_PMJ = 2,

   SAMPLING_NUM_PATTERNS,
 };
@@ -373,6 +374,8 @@
 #endif
   PASS_RENDER_TIME,
   PASS_CRYPTOMATTE,
+  PASS_ADAPTIVE_AUX_BUFFER,
+  PASS_SAMPLE_COUNT,
   PASS_AOV_COLOR,
   PASS_AOV_VALUE,
   PASS_CATEGORY_MAIN_END = 31,
@@ -1239,6 +1242,9 @@
   int cryptomatte_depth;
   int pass_cryptomatte;

+  int pass_adaptive_aux_buffer;
+  int pass_sample_count;
+
   int pass_mist;
   float mist_start;
   float mist_inv_depth;
@@ -1273,6 +1279,7 @@
   int display_divide_pass_stride;
   int use_display_exposure;
   int use_display_pass_alpha;
+  int pad1, pad2;
 } KernelFilm;
 static_assert_align(KernelFilm, 16);

@@ -1354,6 +1361,8 @@
   /* sampler */
   int sampling_pattern;
   int aa_samples;
+  int adaptive_min_samples;
+  float adaptive_threshold;

   /* volume render */
   int use_volumes;
@@ -1429,7 +1438,7 @@

 typedef struct KernelTables {
   int beckmann_offset;
-  int pad1, pad2, pad3;
+  int pad1;
 } KernelTables;
 static_assert_align(KernelTables, 16);

@@ -1679,7 +1688,7 @@
   uint start_sample;
   uint num_samples;

-  uint offset;
+  int offset;
   uint stride;

   ccl_global float *buffer;
diff -Naur a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
--- a/intern/cycles/kernel/kernel_work_stealing.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/kernel/kernel_work_stealing.h	2020-01-10 20:42:43.467590055 +0300
@@ -23,17 +23,41 @@
  * Utility functions for work stealing
  */

+/* Map global work index to tile, pixel X/Y and sample. */
+ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
+                                      uint global_work_index,
+                                      ccl_private uint *x,
+                                      ccl_private uint *y,
+                                      ccl_private uint *sample)
+{
+#ifdef __KERNEL_CUDA__
+  /* Keeping threads for the same pixel together improves performance on CUDA. */
+  uint sample_offset = global_work_index % tile->num_samples;
+  uint pixel_offset = global_work_index / tile->num_samples;
+#else  /* __KERNEL_CUDA__ */
+  uint tile_pixels = tile->w * tile->h;
+  uint sample_offset = global_work_index / tile_pixels;
+  uint pixel_offset = global_work_index - sample_offset * tile_pixels;
+#endif /* __KERNEL_CUDA__ */
+  uint y_offset = pixel_offset / tile->w;
+  uint x_offset = pixel_offset - y_offset * tile->w;
+
+  *x = tile->x + x_offset;
+  *y = tile->y + y_offset;
+  *sample = tile->start_sample + sample_offset;
+}
+
 #ifdef __KERNEL_OPENCL__
 #  pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
 #endif

 #ifdef __SPLIT_KERNEL__
 /* Returns true if there is work */
-ccl_device bool get_next_work(KernelGlobals *kg,
-                              ccl_global uint *work_pools,
-                              uint total_work_size,
-                              uint ray_index,
-                              ccl_private uint *global_work_index)
+ccl_device bool get_next_work_item(KernelGlobals *kg,
+                                   ccl_global uint *work_pools,
+                                   uint total_work_size,
+                                   uint ray_index,
+                                   ccl_private uint *global_work_index)
 {
   /* With a small amount of work there may be more threads than work due to
    * rounding up of global size, stop such threads immediately. */
@@ -56,31 +80,37 @@
   /* Test if all work for this pool is done. */
   return (*global_work_index < total_work_size);
 }
-#endif

-/* Map global work index to tile, pixel X/Y and sample. */
-ccl_device_inline void get_work_pixel(ccl_global const WorkTile *tile,
-                                      uint global_work_index,
-                                      ccl_private uint *x,
-                                      ccl_private uint *y,
-                                      ccl_private uint *sample)
+ccl_device bool get_next_work(KernelGlobals *kg,
+                              ccl_global uint *work_pools,
+                              uint total_work_size,
+                              uint ray_index,
+                              ccl_private uint *global_work_index)
 {
-#ifdef __KERNEL_CUDA__
-  /* Keeping threads for the same pixel together improves performance on CUDA. */
-  uint sample_offset = global_work_index % tile->num_samples;
-  uint pixel_offset = global_work_index / tile->num_samples;
-#else  /* __KERNEL_CUDA__ */
-  uint tile_pixels = tile->w * tile->h;
-  uint sample_offset = global_work_index / tile_pixels;
-  uint pixel_offset = global_work_index - sample_offset * tile_pixels;
-#endif /* __KERNEL_CUDA__ */
-  uint y_offset = pixel_offset / tile->w;
-  uint x_offset = pixel_offset - y_offset * tile->w;
-
-  *x = tile->x + x_offset;
-  *y = tile->y + y_offset;
-  *sample = tile->start_sample + sample_offset;
+  bool got_work = false;
+  if (kernel_data.film.pass_adaptive_aux_buffer) {
+    do {
+      got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+      if (got_work) {
+        ccl_global WorkTile *tile = &kernel_split_params.tile;
+        uint x, y, sample;
+        get_work_pixel(tile, *global_work_index, &x, &y, &sample);
+        uint buffer_offset = (tile->offset + x + y * tile->stride) * kernel_data.film.pass_stride;
+        ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+        ccl_global float4 *aux = (ccl_global float4 *)(buffer +
+                                                       kernel_data.film.pass_adaptive_aux_buffer);
+        if (aux->w == 0.0f) {
+          break;
+        }
+      }
+    } while (got_work);
+  }
+  else {
+    got_work = get_next_work_item(kg, work_pools, total_work_size, ray_index, global_work_index);
+  }
+  return got_work;
 }
+#endif

 CCL_NAMESPACE_END

diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h
--- a/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/split/kernel_adaptive_adjust_samples.h	2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_adjust_samples(KernelGlobals *kg)
+{
+  int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h) {
+    int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+    int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+    int buffer_offset = (kernel_split_params.tile.offset + x +
+                         y * kernel_split_params.tile.stride) *
+                        kernel_data.film.pass_stride;
+    ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+    int sample = kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples;
+    if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
+      buffer[kernel_data.film.pass_sample_count] = -buffer[kernel_data.film.pass_sample_count];
+      float sample_multiplier = sample / max((float)kernel_split_params.tile.start_sample + 1.0f,
+                                             buffer[kernel_data.film.pass_sample_count]);
+      if (sample_multiplier != 1.0f) {
+        kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
+      }
+    }
+    else {
+      kernel_adaptive_post_adjust(kg, buffer, sample / (sample - 1.0f));
+    }
+  }
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h
--- a/intern/cycles/kernel/split/kernel_adaptive_filter_x.h	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_x.h	2020-01-10 20:42:43.467590055 +0300
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_x(KernelGlobals *kg)
+{
+  int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (pixel_index < kernel_split_params.tile.h &&
+      kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+          kernel_data.integrator.adaptive_min_samples) {
+    int y = kernel_split_params.tile.y + pixel_index;
+    kernel_do_adaptive_filter_x(kg, y, &kernel_split_params.tile);
+  }
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h
--- a/intern/cycles/kernel/split/kernel_adaptive_filter_y.h	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/split/kernel_adaptive_filter_y.h	2020-01-10 20:42:43.470923389 +0300
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_filter_y(KernelGlobals *kg)
+{
+  int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (pixel_index < kernel_split_params.tile.w &&
+      kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+          kernel_data.integrator.adaptive_min_samples) {
+    int x = kernel_split_params.tile.x + pixel_index;
+    kernel_do_adaptive_filter_y(kg, x, &kernel_split_params.tile);
+  }
+}
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/kernel/split/kernel_adaptive_stopping.h b/intern/cycles/kernel/split/kernel_adaptive_stopping.h
--- a/intern/cycles/kernel/split/kernel_adaptive_stopping.h	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/kernel/split/kernel_adaptive_stopping.h	2020-01-10 20:42:43.470923389 +0300
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_adaptive_stopping(KernelGlobals *kg)
+{
+  int pixel_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+  if (pixel_index < kernel_split_params.tile.w * kernel_split_params.tile.h &&
+      kernel_split_params.tile.start_sample + kernel_split_params.tile.num_samples >=
+          kernel_data.integrator.adaptive_min_samples) {
+    int x = kernel_split_params.tile.x + pixel_index % kernel_split_params.tile.w;
+    int y = kernel_split_params.tile.y + pixel_index / kernel_split_params.tile.w;
+    int buffer_offset = (kernel_split_params.tile.offset + x +
+                         y * kernel_split_params.tile.stride) *
+                        kernel_data.film.pass_stride;
+    ccl_global float *buffer = kernel_split_params.tile.buffer + buffer_offset;
+    kernel_do_adaptive_stopping(kg,
+                                buffer,
+                                kernel_split_params.tile.start_sample +
+                                    kernel_split_params.tile.num_samples - 1);
+  }
+}
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
--- a/intern/cycles/render/buffers.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/buffers.cpp	2020-01-10 20:42:43.470923389 +0300
@@ -258,6 +258,22 @@
     return false;
   }

+  float *sample_count = NULL;
+  if (type == PassType::PASS_COMBINED) {
+    int sample_offset = 0;
+    for (size_t j = 0; j < params.passes.size(); j++) {
+      Pass &pass = params.passes[j];
+      if (pass.type != PASS_SAMPLE_COUNT) {
+        sample_offset += pass.components;
+        continue;
+      }
+      else {
+        sample_count = buffer.data() + sample_offset;
+        break;
+      }
+    }
+  }
+
   int pass_offset = 0;

   for (size_t j = 0; j < params.passes.size(); j++) {
@@ -418,6 +434,11 @@
       }
       else {
         for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+          if (sample_count && sample_count[i * pass_stride] < 0.0f) {
+            scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f;
+            scale_exposure = (pass.exposure) ? scale * exposure : scale;
+          }
+
           float4 f = make_float4(in[0], in[1], in[2], in[3]);

           pixels[0] = f.x * scale_exposure;
diff -Naur a/intern/cycles/render/buffers.cpp.orig b/intern/cycles/render/buffers.cpp.orig
--- a/intern/cycles/render/buffers.cpp.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/buffers.cpp.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+
+#include "render/buffers.h"
+#include "device/device.h"
+
+#include "util/util_foreach.h"
+#include "util/util_hash.h"
+#include "util/util_math.h"
+#include "util/util_opengl.h"
+#include "util/util_time.h"
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Buffer Params */
+
+BufferParams::BufferParams()
+{
+  width = 0;
+  height = 0;
+
+  full_x = 0;
+  full_y = 0;
+  full_width = 0;
+  full_height = 0;
+
+  denoising_data_pass = false;
+  denoising_clean_pass = false;
+  denoising_prefiltered_pass = false;
+}
+
+void BufferParams::get_offset_stride(int &offset, int &stride)
+{
+  offset = -(full_x + full_y * width);
+  stride = width;
+}
+
+bool BufferParams::modified(const BufferParams &params)
+{
+  return !(full_x == params.full_x && full_y == params.full_y && width == params.width &&
+           height == params.height && full_width == params.full_width &&
+           full_height == params.full_height && Pass::equals(passes, params.passes) &&
+           denoising_data_pass == params.denoising_data_pass &&
+           denoising_clean_pass == params.denoising_clean_pass &&
+           denoising_prefiltered_pass == params.denoising_prefiltered_pass);
+}
+
+int BufferParams::get_passes_size()
+{
+  int size = 0;
+
+  for (size_t i = 0; i < passes.size(); i++)
+    size += passes[i].components;
+
+  if (denoising_data_pass) {
+    size += DENOISING_PASS_SIZE_BASE;
+    if (denoising_clean_pass)
+      size += DENOISING_PASS_SIZE_CLEAN;
+    if (denoising_prefiltered_pass)
+      size += DENOISING_PASS_SIZE_PREFILTERED;
+  }
+
+  return align_up(size, 4);
+}
+
+int BufferParams::get_denoising_offset()
+{
+  int offset = 0;
+
+  for (size_t i = 0; i < passes.size(); i++)
+    offset += passes[i].components;
+
+  return offset;
+}
+
+int BufferParams::get_denoising_prefiltered_offset()
+{
+  assert(denoising_prefiltered_pass);
+
+  int offset = get_denoising_offset();
+
+  offset += DENOISING_PASS_SIZE_BASE;
+  if (denoising_clean_pass) {
+    offset += DENOISING_PASS_SIZE_CLEAN;
+  }
+
+  return offset;
+}
+
+/* Render Buffer Task */
+
+RenderTile::RenderTile()
+{
+  x = 0;
+  y = 0;
+  w = 0;
+  h = 0;
+
+  sample = 0;
+  start_sample = 0;
+  num_samples = 0;
+  resolution = 0;
+
+  offset = 0;
+  stride = 0;
+
+  buffer = 0;
+
+  buffers = NULL;
+}
+
+/* Render Buffers */
+
+RenderBuffers::RenderBuffers(Device *device)
+    : buffer(device, "RenderBuffers", MEM_READ_WRITE),
+      map_neighbor_copied(false),
+      render_time(0.0f)
+{
+}
+
+RenderBuffers::~RenderBuffers()
+{
+  buffer.free();
+}
+
+void RenderBuffers::reset(BufferParams &params_)
+{
+  params = params_;
+
+  /* re-allocate buffer */
+  buffer.alloc(params.width * params.height * params.get_passes_size());
+  buffer.zero_to_device();
+}
+
+void RenderBuffers::zero()
+{
+  buffer.zero_to_device();
+}
+
+bool RenderBuffers::copy_from_device()
+{
+  if (!buffer.device_pointer)
+    return false;
+
+  buffer.copy_from_device(0, params.width * params.get_passes_size(), params.height);
+
+  return true;
+}
+
+bool RenderBuffers::get_denoising_pass_rect(
+    int type, float exposure, int sample, int components, float *pixels)
+{
+  if (buffer.data() == NULL) {
+    return false;
+  }
+
+  float scale = 1.0f;
+  float alpha_scale = 1.0f / sample;
+  if (type == DENOISING_PASS_PREFILTERED_COLOR || type == DENOISING_PASS_CLEAN ||
+      type == DENOISING_PASS_PREFILTERED_INTENSITY) {
+    scale *= exposure;
+  }
+  else if (type == DENOISING_PASS_PREFILTERED_VARIANCE) {
+    scale *= exposure * exposure * (sample - 1);
+  }
+
+  int offset;
+  if (type == DENOISING_PASS_CLEAN) {
+    /* The clean pass isn't changed by prefiltering, so we use the original one there. */
+    offset = type + params.get_denoising_offset();
+    scale /= sample;
+  }
+  else if (params.denoising_prefiltered_pass) {
+    offset = type + params.get_denoising_prefiltered_offset();
+  }
+  else {
+    switch (type) {
+      case DENOISING_PASS_PREFILTERED_DEPTH:
+        offset = params.get_denoising_offset() + DENOISING_PASS_DEPTH;
+        break;
+      case DENOISING_PASS_PREFILTERED_NORMAL:
+        offset = params.get_denoising_offset() + DENOISING_PASS_NORMAL;
+        break;
+      case DENOISING_PASS_PREFILTERED_ALBEDO:
+        offset = params.get_denoising_offset() + DENOISING_PASS_ALBEDO;
+        break;
+      case DENOISING_PASS_PREFILTERED_COLOR:
+        /* If we're not saving the prefiltering result, return the original noisy pass. */
+        offset = params.get_denoising_offset() + DENOISING_PASS_COLOR;
+        break;
+      default:
+        return false;
+    }
+    scale /= sample;
+  }
+
+  int pass_stride = params.get_passes_size();
+  int size = params.width * params.height;
+
+  float *in = buffer.data() + offset;
+
+  if (components == 1) {
+    for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+      pixels[0] = in[0] * scale;
+    }
+  }
+  else if (components == 3) {
+    for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
+      pixels[0] = in[0] * scale;
+      pixels[1] = in[1] * scale;
+      pixels[2] = in[2] * scale;
+    }
+  }
+  else if (components == 4) {
+    /* Since the alpha channel is not involved in denoising, output the Combined alpha channel. */
+    assert(params.passes[0].type == PASS_COMBINED);
+    float *in_combined = buffer.data();
+
+    for (int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) {
+      float3 val = make_float3(in[0], in[1], in[2]);
+      if (type == DENOISING_PASS_PREFILTERED_COLOR && params.denoising_prefiltered_pass) {
+        /* Remove highlight compression from the image. */
+        val = color_highlight_uncompress(val);
+      }
+      pixels[0] = val.x * scale;
+      pixels[1] = val.y * scale;
+      pixels[2] = val.z * scale;
+      pixels[3] = saturate(in_combined[3] * alpha_scale);
+    }
+  }
+  else {
+    return false;
+  }
+
+  return true;
+}
+
+bool RenderBuffers::get_pass_rect(
+    const string &name, float exposure, int sample, int components, float *pixels)
+{
+  if (buffer.data() == NULL) {
+    return false;
+  }
+
+  int pass_offset = 0;
+
+  for (size_t j = 0; j < params.passes.size(); j++) {
+    Pass &pass = params.passes[j];
+
+    /* Pass is identified by both type and name, multiple of the same type
+     * may exist with a different name. */
+    if (pass.name != name) {
+      pass_offset += pass.components;
+      continue;
+    }
+
+    PassType type = pass.type;
+
+    float *in = buffer.data() + pass_offset;
+    int pass_stride = params.get_passes_size();
+
+    float scale = (pass.filter) ? 1.0f / (float)sample : 1.0f;
+    float scale_exposure = (pass.exposure) ? scale * exposure : scale;
+
+    int size = params.width * params.height;
+
+    if (components == 1 && type == PASS_RENDER_TIME) {
+      /* Render time is not stored by kernel, but measured per tile. */
+      float val = (float)(1000.0 * render_time / (params.width * params.height * sample));
+      for (int i = 0; i < size; i++, pixels++) {
+        pixels[0] = val;
+      }
+    }
+    else if (components == 1) {
+      assert(pass.components == components);
+
+      /* Scalar */
+      if (type == PASS_DEPTH) {
+        for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+          float f = *in;
+          pixels[0] = (f == 0.0f) ? 1e10f : f * scale_exposure;
+        }
+      }
+      else if (type == PASS_MIST) {
+        for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+          float f = *in;
+          pixels[0] = saturate(f * scale_exposure);
+        }
+      }
+#ifdef WITH_CYCLES_DEBUG
+      else if (type == PASS_BVH_TRAVERSED_NODES || type == PASS_BVH_TRAVERSED_INSTANCES ||
+               type == PASS_BVH_INTERSECTIONS || type == PASS_RAY_BOUNCES) {
+        for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+          float f = *in;
+          pixels[0] = f * scale;
+        }
+      }
+#endif
+      else {
+        for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+          float f = *in;
+          pixels[0] = f * scale_exposure;
+        }
+      }
+    }
+    else if (components == 3) {
+      assert(pass.components == 4);
+
+      /* RGBA */
+      if (type == PASS_SHADOW) {
+        for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
+          float4 f = make_float4(in[0], in[1], in[2], in[3]);
+          float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f;
+
+          pixels[0] = f.x * invw;
+          pixels[1] = f.y * invw;
+          pixels[2] = f.z * invw;
+        }
+      }
+      else if (pass.divide_type != PASS_NONE) {
+        /* RGB lighting passes that need to divide out color */
+        pass_offset = 0;
+        for (size_t k = 0; k < params.passes.size(); k++) {
+          Pass &color_pass = params.passes[k];
+          if (color_pass.type == pass.divide_type)
+            break;
+          pass_offset += color_pass.components;
+        }
+
+        float *in_divide = buffer.data() + pass_offset;
+
+        for (int i = 0; i < size; i++, in += pass_stride, in_divide += pass_stride, pixels += 3) {
+          float3 f = make_float3(in[0], in[1], in[2]);
+          float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]);
+
+          f = safe_divide_even_color(f * exposure, f_divide);
+
+          pixels[0] = f.x;
+          pixels[1] = f.y;
+          pixels[2] = f.z;
+        }
+      }
+      else {
+        /* RGB/vector */
+        for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) {
+          float3 f = make_float3(in[0], in[1], in[2]);
+
+          pixels[0] = f.x * scale_exposure;
+          pixels[1] = f.y * scale_exposure;
+          pixels[2] = f.z * scale_exposure;
+        }
+      }
+    }
+    else if (components == 4) {
+      assert(pass.components == components);
+
+      /* RGBA */
+      if (type == PASS_SHADOW) {
+        for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+          float4 f = make_float4(in[0], in[1], in[2], in[3]);
+          float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f;
+
+          pixels[0] = f.x * invw;
+          pixels[1] = f.y * invw;
+          pixels[2] = f.z * invw;
+          pixels[3] = 1.0f;
+        }
+      }
+      else if (type == PASS_MOTION) {
+        /* need to normalize by number of samples accumulated for motion */
+        pass_offset = 0;
+        for (size_t k = 0; k < params.passes.size(); k++) {
+          Pass &color_pass = params.passes[k];
+          if (color_pass.type == PASS_MOTION_WEIGHT)
+            break;
+          pass_offset += color_pass.components;
+        }
+
+        float *in_weight = buffer.data() + pass_offset;
+
+        for (int i = 0; i < size; i++, in += pass_stride, in_weight += pass_stride, pixels += 4) {
+          float4 f = make_float4(in[0], in[1], in[2], in[3]);
+          float w = in_weight[0];
+          float invw = (w > 0.0f) ? 1.0f / w : 0.0f;
+
+          pixels[0] = f.x * invw;
+          pixels[1] = f.y * invw;
+          pixels[2] = f.z * invw;
+          pixels[3] = f.w * invw;
+        }
+      }
+      else if (type == PASS_CRYPTOMATTE) {
+        for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+          float4 f = make_float4(in[0], in[1], in[2], in[3]);
+          /* x and z contain integer IDs, don't rescale them.
+             y and w contain matte weights, they get scaled. */
+          pixels[0] = f.x;
+          pixels[1] = f.y * scale;
+          pixels[2] = f.z;
+          pixels[3] = f.w * scale;
+        }
+      }
+      else {
+        for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) {
+          float4 f = make_float4(in[0], in[1], in[2], in[3]);
+
+          pixels[0] = f.x * scale_exposure;
+          pixels[1] = f.y * scale_exposure;
+          pixels[2] = f.z * scale_exposure;
+
+          /* clamp since alpha might be > 1.0 due to russian roulette */
+          pixels[3] = saturate(f.w * scale);
+        }
+      }
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+/* Display Buffer */
+
+DisplayBuffer::DisplayBuffer(Device *device, bool linear)
+    : draw_width(0),
+      draw_height(0),
+      transparent(true), /* todo: determine from background */
+      half_float(linear),
+      rgba_byte(device, "display buffer byte"),
+      rgba_half(device, "display buffer half")
+{
+}
+
+DisplayBuffer::~DisplayBuffer()
+{
+  rgba_byte.free();
+  rgba_half.free();
+}
+
+void DisplayBuffer::reset(BufferParams &params_)
+{
+  draw_width = 0;
+  draw_height = 0;
+
+  params = params_;
+
+  /* allocate display pixels */
+  if (half_float) {
+    rgba_half.alloc_to_device(params.width, params.height);
+  }
+  else {
+    rgba_byte.alloc_to_device(params.width, params.height);
+  }
+}
+
+void DisplayBuffer::draw_set(int width, int height)
+{
+  assert(width <= params.width && height <= params.height);
+
+  draw_width = width;
+  draw_height = height;
+}
+
+void DisplayBuffer::draw(Device *device, const DeviceDrawParams &draw_params)
+{
+  if (draw_width != 0 && draw_height != 0) {
+    device_memory &rgba = (half_float) ? (device_memory &)rgba_half : (device_memory &)rgba_byte;
+
+    device->draw_pixels(rgba,
+                        0,
+                        draw_width,
+                        draw_height,
+                        params.width,
+                        params.height,
+                        params.full_x,
+                        params.full_y,
+                        params.full_width,
+                        params.full_height,
+                        transparent,
+                        draw_params);
+  }
+}
+
+bool DisplayBuffer::draw_ready()
+{
+  return (draw_width != 0 && draw_height != 0);
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
--- a/intern/cycles/render/CMakeLists.txt	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/CMakeLists.txt	2020-01-10 20:42:43.470923389 +0300
@@ -22,6 +22,7 @@
   graph.cpp
   image.cpp
   integrator.cpp
+  jitter.cpp
   light.cpp
   merge.cpp
   mesh.cpp
@@ -58,6 +59,7 @@
   image.h
   integrator.h
   light.h
+  jitter.h
   merge.h
   mesh.h
   nodes.h
diff -Naur a/intern/cycles/render/CMakeLists.txt.orig b/intern/cycles/render/CMakeLists.txt.orig
--- a/intern/cycles/render/CMakeLists.txt.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/CMakeLists.txt.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,93 @@
+
+set(INC
+  ..
+  ../../glew-mx
+)
+
+set(INC_SYS
+  ${GLEW_INCLUDE_DIR}
+)
+
+set(SRC
+  attribute.cpp
+  background.cpp
+  bake.cpp
+  buffers.cpp
+  camera.cpp
+  colorspace.cpp
+  constant_fold.cpp
+  coverage.cpp
+  denoising.cpp
+  film.cpp
+  graph.cpp
+  image.cpp
+  integrator.cpp
+  light.cpp
+  merge.cpp
+  mesh.cpp
+  mesh_displace.cpp
+  mesh_subdivision.cpp
+  mesh_volume.cpp
+  nodes.cpp
+  object.cpp
+  osl.cpp
+  particles.cpp
+  curves.cpp
+  scene.cpp
+  session.cpp
+  shader.cpp
+  sobol.cpp
+  stats.cpp
+  svm.cpp
+  tables.cpp
+  tile.cpp
+)
+
+set(SRC_HEADERS
+  attribute.h
+  bake.h
+  background.h
+  buffers.h
+  camera.h
+  colorspace.h
+  constant_fold.h
+  coverage.h
+  denoising.h
+  film.h
+  graph.h
+  image.h
+  integrator.h
+  light.h
+  merge.h
+  mesh.h
+  nodes.h
+  object.h
+  osl.h
+  particles.h
+  curves.h
+  scene.h
+  session.h
+  shader.h
+  sobol.h
+  stats.h
+  svm.h
+  tables.h
+  tile.h
+)
+
+set(LIB
+  cycles_bvh
+)
+
+if(WITH_CYCLES_OSL)
+  list(APPEND LIB
+    cycles_kernel_osl
+  )
+endif()
+
+include_directories(${INC})
+include_directories(SYSTEM ${INC_SYS})
+
+add_definitions(${GL_DEFINITIONS})
+
+cycles_add_library(cycles_render "${LIB}" ${SRC} ${SRC_HEADERS})
diff -Naur a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
--- a/intern/cycles/render/film.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/film.cpp	2020-01-10 20:42:43.470923389 +0300
@@ -196,6 +196,13 @@
     case PASS_AOV_VALUE:
       pass.components = 1;
       break;
+    case PASS_ADAPTIVE_AUX_BUFFER:
+      pass.components = 4;
+      break;
+    case PASS_SAMPLE_COUNT:
+      pass.components = 1;
+      pass.exposure = false;
+      break;
     default:
       assert(false);
       break;
@@ -318,6 +325,7 @@
   SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false);
   SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false);
   SOCKET_INT(denoising_flags, "Denoising Flags", 0);
+  SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false);

   return type;
 }
@@ -507,6 +515,12 @@
           have_aov_value = true;
         }
         break;
+      case PASS_ADAPTIVE_AUX_BUFFER:
+        kfilm->pass_adaptive_aux_buffer = kfilm->pass_stride;
+        break;
+      case PASS_SAMPLE_COUNT:
+        kfilm->pass_sample_count = kfilm->pass_stride;
+        break;
       default:
         assert(false);
         break;
diff -Naur a/intern/cycles/render/film.cpp.orig b/intern/cycles/render/film.cpp.orig
--- a/intern/cycles/render/film.cpp.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/film.cpp.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,638 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "render/camera.h"
+#include "device/device.h"
+#include "render/film.h"
+#include "render/integrator.h"
+#include "render/mesh.h"
+#include "render/scene.h"
+#include "render/tables.h"
+
+#include "util/util_algorithm.h"
+#include "util/util_foreach.h"
+#include "util/util_math.h"
+#include "util/util_math_cdf.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Pass */
+
+static bool compare_pass_order(const Pass &a, const Pass &b)
+{
+  if (a.components == b.components)
+    return (a.type < b.type);
+  return (a.components > b.components);
+}
+
+void Pass::add(PassType type, vector<Pass> &passes, const char *name)
+{
+  for (size_t i = 0; i < passes.size(); i++) {
+    if (passes[i].type != type) {
+      continue;
+    }
+
+    /* An empty name is used as a placeholder to signal that any pass of
+     * that type is fine (because the content always is the same).
+     * This is important to support divide_type: If the pass that has a
+     * divide_type is added first, a pass for divide_type with an empty
+     * name will be added. Then, if a matching pass with a name is later
+     * requested, the existing placeholder will be renamed to that.
+     * If the divide_type is explicitly allocated with a name first and
+     * then again as part of another pass, the second one will just be
+     * skipped because that type already exists. */
+
+    /* If no name is specified, any pass of the correct type will match. */
+    if (name == NULL) {
+      return;
+    }
+
+    /* If we already have a placeholder pass, rename that one. */
+    if (passes[i].name.empty()) {
+      passes[i].name = name;
+      return;
+    }
+
+    /* If neither existing nor requested pass have placeholder name, they
+     * must match. */
+    if (name == passes[i].name) {
+      return;
+    }
+  }
+
+  Pass pass;
+
+  pass.type = type;
+  pass.filter = true;
+  pass.exposure = false;
+  pass.divide_type = PASS_NONE;
+  if (name) {
+    pass.name = name;
+  }
+
+  switch (type) {
+    case PASS_NONE:
+      pass.components = 0;
+      break;
+    case PASS_COMBINED:
+      pass.components = 4;
+      pass.exposure = true;
+      break;
+    case PASS_DEPTH:
+      pass.components = 1;
+      pass.filter = false;
+      break;
+    case PASS_MIST:
+      pass.components = 1;
+      break;
+    case PASS_NORMAL:
+      pass.components = 4;
+      break;
+    case PASS_UV:
+      pass.components = 4;
+      break;
+    case PASS_MOTION:
+      pass.components = 4;
+      pass.divide_type = PASS_MOTION_WEIGHT;
+      break;
+    case PASS_MOTION_WEIGHT:
+      pass.components = 1;
+      break;
+    case PASS_OBJECT_ID:
+    case PASS_MATERIAL_ID:
+      pass.components = 1;
+      pass.filter = false;
+      break;
+
+    case PASS_EMISSION:
+    case PASS_BACKGROUND:
+      pass.components = 4;
+      pass.exposure = true;
+      break;
+    case PASS_AO:
+      pass.components = 4;
+      break;
+    case PASS_SHADOW:
+      pass.components = 4;
+      pass.exposure = false;
+      break;
+    case PASS_LIGHT:
+      /* This isn't a real pass, used by baking to see whether
+       * light data is needed or not.
+       *
+       * Set components to 0 so pass sort below happens in a
+       * determined way.
+       */
+      pass.components = 0;
+      break;
+#ifdef WITH_CYCLES_DEBUG
+    case PASS_BVH_TRAVERSED_NODES:
+    case PASS_BVH_TRAVERSED_INSTANCES:
+    case PASS_BVH_INTERSECTIONS:
+    case PASS_RAY_BOUNCES:
+      pass.components = 1;
+      pass.exposure = false;
+      break;
+#endif
+    case PASS_RENDER_TIME:
+      /* This pass is handled entirely on the host side. */
+      pass.components = 0;
+      break;
+
+    case PASS_DIFFUSE_COLOR:
+    case PASS_GLOSSY_COLOR:
+    case PASS_TRANSMISSION_COLOR:
+    case PASS_SUBSURFACE_COLOR:
+      pass.components = 4;
+      break;
+    case PASS_DIFFUSE_DIRECT:
+    case PASS_DIFFUSE_INDIRECT:
+      pass.components = 4;
+      pass.exposure = true;
+      pass.divide_type = PASS_DIFFUSE_COLOR;
+      break;
+    case PASS_GLOSSY_DIRECT:
+    case PASS_GLOSSY_INDIRECT:
+      pass.components = 4;
+      pass.exposure = true;
+      pass.divide_type = PASS_GLOSSY_COLOR;
+      break;
+    case PASS_TRANSMISSION_DIRECT:
+    case PASS_TRANSMISSION_INDIRECT:
+      pass.components = 4;
+      pass.exposure = true;
+      pass.divide_type = PASS_TRANSMISSION_COLOR;
+      break;
+    case PASS_SUBSURFACE_DIRECT:
+    case PASS_SUBSURFACE_INDIRECT:
+      pass.components = 4;
+      pass.exposure = true;
+      pass.divide_type = PASS_SUBSURFACE_COLOR;
+      break;
+    case PASS_VOLUME_DIRECT:
+    case PASS_VOLUME_INDIRECT:
+      pass.components = 4;
+      pass.exposure = true;
+      break;
+    case PASS_CRYPTOMATTE:
+      pass.components = 4;
+      break;
+    case PASS_AOV_COLOR:
+      pass.components = 4;
+      break;
+    case PASS_AOV_VALUE:
+      pass.components = 1;
+      break;
+    default:
+      assert(false);
+      break;
+  }
+
+  passes.push_back(pass);
+
+  /* order from by components, to ensure alignment so passes with size 4
+   * come first and then passes with size 1 */
+  sort(&passes[0], &passes[0] + passes.size(), compare_pass_order);
+
+  if (pass.divide_type != PASS_NONE)
+    Pass::add(pass.divide_type, passes);
+}
+
+bool Pass::equals(const vector<Pass> &A, const vector<Pass> &B)
+{
+  if (A.size() != B.size())
+    return false;
+
+  for (int i = 0; i < A.size(); i++)
+    if (A[i].type != B[i].type || A[i].name != B[i].name)
+      return false;
+
+  return true;
+}
+
+bool Pass::contains(const vector<Pass> &passes, PassType type)
+{
+  for (size_t i = 0; i < passes.size(); i++)
+    if (passes[i].type == type)
+      return true;
+
+  return false;
+}
+
+/* Pixel Filter */
+
+static float filter_func_box(float /*v*/, float /*width*/)
+{
+  return 1.0f;
+}
+
+static float filter_func_gaussian(float v, float width)
+{
+  v *= 6.0f / width;
+  return expf(-2.0f * v * v);
+}
+
+static float filter_func_blackman_harris(float v, float width)
+{
+  v = M_2PI_F * (v / width + 0.5f);
+  return 0.35875f - 0.48829f * cosf(v) + 0.14128f * cosf(2.0f * v) - 0.01168f * cosf(3.0f * v);
+}
+
+static vector<float> filter_table(FilterType type, float width)
+{
+  vector<float> filter_table(FILTER_TABLE_SIZE);
+  float (*filter_func)(float, float) = NULL;
+
+  switch (type) {
+    case FILTER_BOX:
+      filter_func = filter_func_box;
+      break;
+    case FILTER_GAUSSIAN:
+      filter_func = filter_func_gaussian;
+      width *= 3.0f;
+      break;
+    case FILTER_BLACKMAN_HARRIS:
+      filter_func = filter_func_blackman_harris;
+      width *= 2.0f;
+      break;
+    default:
+      assert(0);
+  }
+
+  /* Create importance sampling table. */
+
+  /* TODO(sergey): With the even filter table size resolution we can not
+   * really make it nice symmetric importance map without sampling full range
+   * (meaning, we would need to sample full filter range and not use the
+   * make_symmetric argument).
+   *
+   * Current code matches exactly initial filter table code, but we should
+   * consider either making FILTER_TABLE_SIZE odd value or sample full filter.
+   */
+
+  util_cdf_inverted(FILTER_TABLE_SIZE,
+                    0.0f,
+                    width * 0.5f,
+                    function_bind(filter_func, _1, width),
+                    true,
+                    filter_table);
+
+  return filter_table;
+}
+
+/* Film */
+
+NODE_DEFINE(Film)
+{
+  NodeType *type = NodeType::add("film", create);
+
+  SOCKET_FLOAT(exposure, "Exposure", 0.8f);
+  SOCKET_FLOAT(pass_alpha_threshold, "Pass Alpha Threshold", 0.0f);
+
+  static NodeEnum filter_enum;
+  filter_enum.insert("box", FILTER_BOX);
+  filter_enum.insert("gaussian", FILTER_GAUSSIAN);
+  filter_enum.insert("blackman_harris", FILTER_BLACKMAN_HARRIS);
+
+  SOCKET_ENUM(filter_type, "Filter Type", filter_enum, FILTER_BOX);
+  SOCKET_FLOAT(filter_width, "Filter Width", 1.0f);
+
+  SOCKET_FLOAT(mist_start, "Mist Start", 0.0f);
+  SOCKET_FLOAT(mist_depth, "Mist Depth", 100.0f);
+  SOCKET_FLOAT(mist_falloff, "Mist Falloff", 1.0f);
+
+  SOCKET_BOOLEAN(denoising_data_pass, "Generate Denoising Data Pass", false);
+  SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false);
+  SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false);
+  SOCKET_INT(denoising_flags, "Denoising Flags", 0);
+
+  return type;
+}
+
+Film::Film() : Node(node_type)
+{
+  use_light_visibility = false;
+  filter_table_offset = TABLE_OFFSET_INVALID;
+  cryptomatte_passes = CRYPT_NONE;
+
+  need_update = true;
+}
+
+Film::~Film()
+{
+}
+
+void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
+{
+  if (!need_update)
+    return;
+
+  device_free(device, dscene, scene);
+
+  KernelFilm *kfilm = &dscene->data.film;
+
+  /* update __data */
+  kfilm->exposure = exposure;
+  kfilm->pass_flag = 0;
+
+  kfilm->display_pass_stride = -1;
+  kfilm->display_pass_components = 0;
+  kfilm->display_divide_pass_stride = -1;
+  kfilm->use_display_exposure = false;
+  kfilm->use_display_pass_alpha = (display_pass == PASS_COMBINED);
+
+  kfilm->light_pass_flag = 0;
+  kfilm->pass_stride = 0;
+  kfilm->use_light_pass = use_light_visibility;
+
+  bool have_cryptomatte = false, have_aov_color = false, have_aov_value = false;
+
+  for (size_t i = 0; i < passes.size(); i++) {
+    Pass &pass = passes[i];
+
+    if (pass.type == PASS_NONE) {
+      continue;
+    }
+
+    /* Can't do motion pass if no motion vectors are available. */
+    if (pass.type == PASS_MOTION || pass.type == PASS_MOTION_WEIGHT) {
+      if (scene->need_motion() != Scene::MOTION_PASS) {
+        kfilm->pass_stride += pass.components;
+        continue;
+      }
+    }
+
+    int pass_flag = (1 << (pass.type % 32));
+    if (pass.type <= PASS_CATEGORY_MAIN_END) {
+      kfilm->pass_flag |= pass_flag;
+    }
+    else {
+      assert(pass.type <= PASS_CATEGORY_LIGHT_END);
+      kfilm->use_light_pass = 1;
+      kfilm->light_pass_flag |= pass_flag;
+    }
+
+    switch (pass.type) {
+      case PASS_COMBINED:
+        kfilm->pass_combined = kfilm->pass_stride;
+        break;
+      case PASS_DEPTH:
+        kfilm->pass_depth = kfilm->pass_stride;
+        break;
+      case PASS_NORMAL:
+        kfilm->pass_normal = kfilm->pass_stride;
+        break;
+      case PASS_UV:
+        kfilm->pass_uv = kfilm->pass_stride;
+        break;
+      case PASS_MOTION:
+        kfilm->pass_motion = kfilm->pass_stride;
+        break;
+      case PASS_MOTION_WEIGHT:
+        kfilm->pass_motion_weight = kfilm->pass_stride;
+        break;
+      case PASS_OBJECT_ID:
+        kfilm->pass_object_id = kfilm->pass_stride;
+        break;
+      case PASS_MATERIAL_ID:
+        kfilm->pass_material_id = kfilm->pass_stride;
+        break;
+
+      case PASS_MIST:
+        kfilm->pass_mist = kfilm->pass_stride;
+        break;
+      case PASS_EMISSION:
+        kfilm->pass_emission = kfilm->pass_stride;
+        break;
+      case PASS_BACKGROUND:
+        kfilm->pass_background = kfilm->pass_stride;
+        break;
+      case PASS_AO:
+        kfilm->pass_ao = kfilm->pass_stride;
+        break;
+      case PASS_SHADOW:
+        kfilm->pass_shadow = kfilm->pass_stride;
+        break;
+
+      case PASS_LIGHT:
+        break;
+
+      case PASS_DIFFUSE_COLOR:
+        kfilm->pass_diffuse_color = kfilm->pass_stride;
+        break;
+      case PASS_GLOSSY_COLOR:
+        kfilm->pass_glossy_color = kfilm->pass_stride;
+        break;
+      case PASS_TRANSMISSION_COLOR:
+        kfilm->pass_transmission_color = kfilm->pass_stride;
+        break;
+      case PASS_SUBSURFACE_COLOR:
+        kfilm->pass_subsurface_color = kfilm->pass_stride;
+        break;
+      case PASS_DIFFUSE_INDIRECT:
+        kfilm->pass_diffuse_indirect = kfilm->pass_stride;
+        break;
+      case PASS_GLOSSY_INDIRECT:
+        kfilm->pass_glossy_indirect = kfilm->pass_stride;
+        break;
+      case PASS_TRANSMISSION_INDIRECT:
+        kfilm->pass_transmission_indirect = kfilm->pass_stride;
+        break;
+      case PASS_SUBSURFACE_INDIRECT:
+        kfilm->pass_subsurface_indirect = kfilm->pass_stride;
+        break;
+      case PASS_VOLUME_INDIRECT:
+        kfilm->pass_volume_indirect = kfilm->pass_stride;
+        break;
+      case PASS_DIFFUSE_DIRECT:
+        kfilm->pass_diffuse_direct = kfilm->pass_stride;
+        break;
+      case PASS_GLOSSY_DIRECT:
+        kfilm->pass_glossy_direct = kfilm->pass_stride;
+        break;
+      case PASS_TRANSMISSION_DIRECT:
+        kfilm->pass_transmission_direct = kfilm->pass_stride;
+        break;
+      case PASS_SUBSURFACE_DIRECT:
+        kfilm->pass_subsurface_direct = kfilm->pass_stride;
+        break;
+      case PASS_VOLUME_DIRECT:
+        kfilm->pass_volume_direct = kfilm->pass_stride;
+        break;
+
+#ifdef WITH_CYCLES_DEBUG
+      case PASS_BVH_TRAVERSED_NODES:
+        kfilm->pass_bvh_traversed_nodes = kfilm->pass_stride;
+        break;
+      case PASS_BVH_TRAVERSED_INSTANCES:
+        kfilm->pass_bvh_traversed_instances = kfilm->pass_stride;
+        break;
+      case PASS_BVH_INTERSECTIONS:
+        kfilm->pass_bvh_intersections = kfilm->pass_stride;
+        break;
+      case PASS_RAY_BOUNCES:
+        kfilm->pass_ray_bounces = kfilm->pass_stride;
+        break;
+#endif
+      case PASS_RENDER_TIME:
+        break;
+      case PASS_CRYPTOMATTE:
+        kfilm->pass_cryptomatte = have_cryptomatte ?
+                                      min(kfilm->pass_cryptomatte, kfilm->pass_stride) :
+                                      kfilm->pass_stride;
+        have_cryptomatte = true;
+        break;
+      case PASS_AOV_COLOR:
+        if (!have_aov_color) {
+          kfilm->pass_aov_color = kfilm->pass_stride;
+          have_aov_color = true;
+        }
+        break;
+      case PASS_AOV_VALUE:
+        if (!have_aov_value) {
+          kfilm->pass_aov_value = kfilm->pass_stride;
+          have_aov_value = true;
+        }
+        break;
+      default:
+        assert(false);
+        break;
+    }
+
+    if (pass.type == display_pass) {
+      kfilm->display_pass_stride = kfilm->pass_stride;
+      kfilm->display_pass_components = pass.components;
+      kfilm->use_display_exposure = pass.exposure && (kfilm->exposure != 1.0f);
+    }
+    else if (pass.type == PASS_DIFFUSE_COLOR || pass.type == PASS_TRANSMISSION_COLOR ||
+             pass.type == PASS_GLOSSY_COLOR || pass.type == PASS_SUBSURFACE_COLOR) {
+      kfilm->display_divide_pass_stride = kfilm->pass_stride;
+    }
+
+    kfilm->pass_stride += pass.components;
+  }
+
+  kfilm->pass_denoising_data = 0;
+  kfilm->pass_denoising_clean = 0;
+  kfilm->denoising_flags = 0;
+  if (denoising_data_pass) {
+    kfilm->pass_denoising_data = kfilm->pass_stride;
+    kfilm->pass_stride += DENOISING_PASS_SIZE_BASE;
+    kfilm->denoising_flags = denoising_flags;
+    if (denoising_clean_pass) {
+      kfilm->pass_denoising_clean = kfilm->pass_stride;
+      kfilm->pass_stride += DENOISING_PASS_SIZE_CLEAN;
+      kfilm->use_light_pass = 1;
+    }
+    if (denoising_prefiltered_pass) {
+      kfilm->pass_stride += DENOISING_PASS_SIZE_PREFILTERED;
+    }
+  }
+
+  kfilm->pass_stride = align_up(kfilm->pass_stride, 4);
+
+  /* When displaying the normal/uv pass in the viewport we need to disable
+   * transparency.
+   *
+   * We also don't need to perform light accumulations. Later we want to optimize this to suppress
+   * light calculations. */
+  if (display_pass == PASS_NORMAL || display_pass == PASS_UV) {
+    kfilm->use_light_pass = 0;
+  }
+  else {
+    kfilm->pass_alpha_threshold = pass_alpha_threshold;
+  }
+
+  /* update filter table */
+  vector<float> table = filter_table(filter_type, filter_width);
+  scene->lookup_tables->remove_table(&filter_table_offset);
+  filter_table_offset = scene->lookup_tables->add_table(dscene, table);
+  kfilm->filter_table_offset = (int)filter_table_offset;
+
+  /* mist pass parameters */
+  kfilm->mist_start = mist_start;
+  kfilm->mist_inv_depth = (mist_depth > 0.0f) ? 1.0f / mist_depth : 0.0f;
+  kfilm->mist_falloff = mist_falloff;
+
+  kfilm->cryptomatte_passes = cryptomatte_passes;
+  kfilm->cryptomatte_depth = cryptomatte_depth;
+
+  pass_stride = kfilm->pass_stride;
+  denoising_data_offset = kfilm->pass_denoising_data;
+  denoising_clean_offset = kfilm->pass_denoising_clean;
+
+  need_update = false;
+}
+
+void Film::device_free(Device * /*device*/, DeviceScene * /*dscene*/, Scene *scene)
+{
+  scene->lookup_tables->remove_table(&filter_table_offset);
+}
+
+bool Film::modified(const Film &film)
+{
+  return !Node::equals(film) || !Pass::equals(passes, film.passes);
+}
+
+void Film::tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes)
+{
+  if (Pass::contains(passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) {
+    scene->mesh_manager->tag_update(scene);
+
+    foreach (Shader *shader, scene->shaders)
+      shader->need_update_mesh = true;
+  }
+  else if (Pass::contains(passes, PASS_MOTION) != Pass::contains(passes_, PASS_MOTION)) {
+    scene->mesh_manager->tag_update(scene);
+  }
+  else if (Pass::contains(passes, PASS_AO) != Pass::contains(passes_, PASS_AO)) {
+    scene->integrator->tag_update(scene);
+  }
+
+  if (update_passes) {
+    passes = passes_;
+  }
+}
+
+void Film::tag_update(Scene * /*scene*/)
+{
+  need_update = true;
+}
+
+int Film::get_aov_offset(string name, bool &is_color)
+{
+  int num_color = 0, num_value = 0;
+  foreach (const Pass &pass, passes) {
+    if (pass.type == PASS_AOV_COLOR) {
+      num_color++;
+    }
+    else if (pass.type == PASS_AOV_VALUE) {
+      num_value++;
+    }
+    else {
+      continue;
+    }
+
+    if (pass.name == name) {
+      is_color = (pass.type == PASS_AOV_COLOR);
+      return (is_color ? num_color : num_value) - 1;
+    }
+  }
+
+  return -1;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/film.h b/intern/cycles/render/film.h
--- a/intern/cycles/render/film.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/film.h	2020-01-10 20:42:43.470923389 +0300
@@ -81,6 +81,8 @@
   CryptomatteType cryptomatte_passes;
   int cryptomatte_depth;

+  bool use_adaptive_sampling;
+
   bool need_update;

   Film();
diff -Naur a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
--- a/intern/cycles/render/integrator.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/integrator.cpp	2020-01-10 20:42:43.470923389 +0300
@@ -18,12 +18,14 @@
 #include "render/background.h"
 #include "render/integrator.h"
 #include "render/film.h"
+#include "render/jitter.h"
 #include "render/light.h"
 #include "render/scene.h"
 #include "render/shader.h"
 #include "render/sobol.h"

 #include "util/util_foreach.h"
+#include "util/util_logging.h"
 #include "util/util_hash.h"

 CCL_NAMESPACE_BEGIN
@@ -66,6 +68,9 @@
   SOCKET_INT(volume_samples, "Volume Samples", 1);
   SOCKET_INT(start_sample, "Start Sample", 0);

+  SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.0f);
+  SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0);
+
   SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true);
   SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true);
   SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f);
@@ -78,6 +83,7 @@
   static NodeEnum sampling_pattern_enum;
   sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
   sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ);
+  sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ);
   SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);

   return type;
@@ -174,6 +180,22 @@

   kintegrator->sampling_pattern = sampling_pattern;
   kintegrator->aa_samples = aa_samples;
+  if (aa_samples > 0 && adaptive_min_samples == 0) {
+    kintegrator->adaptive_min_samples = max(4, (int)sqrtf(aa_samples));
+    VLOG(1) << "Cycles adaptive sampling: automatic min samples = "
+            << kintegrator->adaptive_min_samples;
+  }
+  else {
+    kintegrator->adaptive_min_samples = max(4, adaptive_min_samples);
+  }
+  if (aa_samples > 0 && adaptive_threshold == 0.0f) {
+    kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples);
+    VLOG(1) << "Cycles adaptive sampling: automatic threshold = "
+            << kintegrator->adaptive_threshold;
+  }
+  else {
+    kintegrator->adaptive_threshold = adaptive_threshold;
+  }

   if (light_sampling_threshold > 0.0f) {
     kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
@@ -203,18 +225,34 @@
   int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM;
   dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS);

-  uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions);
+  if (sampling_pattern == SAMPLING_PATTERN_SOBOL) {
+    uint *directions = dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions);

-  sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
+    sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);

-  dscene->sobol_directions.copy_to_device();
+    dscene->sample_pattern_lut.copy_to_device();
+  }
+  else {
+    constexpr int sequence_size = 64 * 64;
+    constexpr int num_sequences = 48;
+    float2 *directions = (float2 *)dscene->sample_pattern_lut.alloc(sequence_size * num_sequences *
+                                                                    2);
+    TaskPool pool;
+    for (int j = 0; j < num_sequences; ++j) {
+      float2 *sequence = directions + j * sequence_size;
+      pool.push(
+          function_bind(&progressive_multi_jitter_02_generate_2D, sequence, sequence_size, j));
+    }
+    pool.wait_work();
+    dscene->sample_pattern_lut.copy_to_device();
+  }

   need_update = false;
 }

 void Integrator::device_free(Device *, DeviceScene *dscene)
 {
-  dscene->sobol_directions.free();
+  dscene->sample_pattern_lut.free();
 }

 bool Integrator::modified(const Integrator &integrator)
diff -Naur a/intern/cycles/render/integrator.cpp.orig b/intern/cycles/render/integrator.cpp.orig
--- a/intern/cycles/render/integrator.cpp.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/integrator.cpp.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "device/device.h"
+#include "render/background.h"
+#include "render/integrator.h"
+#include "render/film.h"
+#include "render/light.h"
+#include "render/scene.h"
+#include "render/shader.h"
+#include "render/sobol.h"
+
+#include "util/util_foreach.h"
+#include "util/util_hash.h"
+
+CCL_NAMESPACE_BEGIN
+
+NODE_DEFINE(Integrator)
+{
+  NodeType *type = NodeType::add("integrator", create);
+
+  SOCKET_INT(min_bounce, "Min Bounce", 0);
+  SOCKET_INT(max_bounce, "Max Bounce", 7);
+
+  SOCKET_INT(max_diffuse_bounce, "Max Diffuse Bounce", 7);
+  SOCKET_INT(max_glossy_bounce, "Max Glossy Bounce", 7);
+  SOCKET_INT(max_transmission_bounce, "Max Transmission Bounce", 7);
+  SOCKET_INT(max_volume_bounce, "Max Volume Bounce", 7);
+
+  SOCKET_INT(transparent_min_bounce, "Transparent Min Bounce", 0);
+  SOCKET_INT(transparent_max_bounce, "Transparent Max Bounce", 7);
+
+  SOCKET_INT(ao_bounces, "AO Bounces", 0);
+
+  SOCKET_INT(volume_max_steps, "Volume Max Steps", 1024);
+  SOCKET_FLOAT(volume_step_size, "Volume Step Size", 0.1f);
+
+  SOCKET_BOOLEAN(caustics_reflective, "Reflective Caustics", true);
+  SOCKET_BOOLEAN(caustics_refractive, "Refractive Caustics", true);
+  SOCKET_FLOAT(filter_glossy, "Filter Glossy", 0.0f);
+  SOCKET_INT(seed, "Seed", 0);
+  SOCKET_FLOAT(sample_clamp_direct, "Sample Clamp Direct", 0.0f);
+  SOCKET_FLOAT(sample_clamp_indirect, "Sample Clamp Indirect", 0.0f);
+  SOCKET_BOOLEAN(motion_blur, "Motion Blur", false);
+
+  SOCKET_INT(aa_samples, "AA Samples", 0);
+  SOCKET_INT(diffuse_samples, "Diffuse Samples", 1);
+  SOCKET_INT(glossy_samples, "Glossy Samples", 1);
+  SOCKET_INT(transmission_samples, "Transmission Samples", 1);
+  SOCKET_INT(ao_samples, "AO Samples", 1);
+  SOCKET_INT(mesh_light_samples, "Mesh Light Samples", 1);
+  SOCKET_INT(subsurface_samples, "Subsurface Samples", 1);
+  SOCKET_INT(volume_samples, "Volume Samples", 1);
+  SOCKET_INT(start_sample, "Start Sample", 0);
+
+  SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true);
+  SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true);
+  SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f);
+
+  static NodeEnum method_enum;
+  method_enum.insert("path", PATH);
+  method_enum.insert("branched_path", BRANCHED_PATH);
+  SOCKET_ENUM(method, "Method", method_enum, PATH);
+
+  static NodeEnum sampling_pattern_enum;
+  sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL);
+  sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ);
+  SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL);
+
+  return type;
+}
+
+Integrator::Integrator() : Node(node_type)
+{
+  need_update = true;
+}
+
+Integrator::~Integrator()
+{
+}
+
+void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene)
+{
+  if (!need_update)
+    return;
+
+  device_free(device, dscene);
+
+  KernelIntegrator *kintegrator = &dscene->data.integrator;
+
+  /* integrator parameters */
+  kintegrator->min_bounce = min_bounce + 1;
+  kintegrator->max_bounce = max_bounce + 1;
+
+  kintegrator->max_diffuse_bounce = max_diffuse_bounce + 1;
+  kintegrator->max_glossy_bounce = max_glossy_bounce + 1;
+  kintegrator->max_transmission_bounce = max_transmission_bounce + 1;
+  kintegrator->max_volume_bounce = max_volume_bounce + 1;
+
+  kintegrator->transparent_min_bounce = transparent_min_bounce + 1;
+  kintegrator->transparent_max_bounce = transparent_max_bounce + 1;
+
+  if (ao_bounces == 0) {
+    kintegrator->ao_bounces = INT_MAX;
+  }
+  else {
+    kintegrator->ao_bounces = ao_bounces - 1;
+  }
+
+  /* Transparent Shadows
+   * We only need to enable transparent shadows, if we actually have
+   * transparent shaders in the scene. Otherwise we can disable it
+   * to improve performance a bit. */
+  kintegrator->transparent_shadows = false;
+  foreach (Shader *shader, scene->shaders) {
+    /* keep this in sync with SD_HAS_TRANSPARENT_SHADOW in shader.cpp */
+    if ((shader->has_surface_transparent && shader->use_transparent_shadow) ||
+        shader->has_volume) {
+      kintegrator->transparent_shadows = true;
+      break;
+    }
+  }
+
+  kintegrator->volume_max_steps = volume_max_steps;
+  kintegrator->volume_step_size = volume_step_size;
+
+  kintegrator->caustics_reflective = caustics_reflective;
+  kintegrator->caustics_refractive = caustics_refractive;
+  kintegrator->filter_glossy = (filter_glossy == 0.0f) ? FLT_MAX : 1.0f / filter_glossy;
+
+  kintegrator->seed = hash_uint2(seed, 0);
+
+  kintegrator->use_ambient_occlusion = ((Pass::contains(scene->film->passes, PASS_AO)) ||
+                                        dscene->data.background.ao_factor != 0.0f);
+
+  kintegrator->sample_clamp_direct = (sample_clamp_direct == 0.0f) ? FLT_MAX :
+                                                                     sample_clamp_direct * 3.0f;
+  kintegrator->sample_clamp_indirect = (sample_clamp_indirect == 0.0f) ?
+                                           FLT_MAX :
+                                           sample_clamp_indirect * 3.0f;
+
+  kintegrator->branched = (method == BRANCHED_PATH);
+  kintegrator->volume_decoupled = device->info.has_volume_decoupled;
+  kintegrator->diffuse_samples = diffuse_samples;
+  kintegrator->glossy_samples = glossy_samples;
+  kintegrator->transmission_samples = transmission_samples;
+  kintegrator->ao_samples = ao_samples;
+  kintegrator->mesh_light_samples = mesh_light_samples;
+  kintegrator->subsurface_samples = subsurface_samples;
+  kintegrator->volume_samples = volume_samples;
+  kintegrator->start_sample = start_sample;
+
+  if (method == BRANCHED_PATH) {
+    kintegrator->sample_all_lights_direct = sample_all_lights_direct;
+    kintegrator->sample_all_lights_indirect = sample_all_lights_indirect;
+  }
+  else {
+    kintegrator->sample_all_lights_direct = false;
+    kintegrator->sample_all_lights_indirect = false;
+  }
+
+  kintegrator->sampling_pattern = sampling_pattern;
+  kintegrator->aa_samples = aa_samples;
+
+  if (light_sampling_threshold > 0.0f) {
+    kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold;
+  }
+  else {
+    kintegrator->light_inv_rr_threshold = 0.0f;
+  }
+
+  /* sobol directions table */
+  int max_samples = 1;
+
+  if (method == BRANCHED_PATH) {
+    foreach (Light *light, scene->lights)
+      max_samples = max(max_samples, light->samples);
+
+    max_samples = max(max_samples,
+                      max(diffuse_samples, max(glossy_samples, transmission_samples)));
+    max_samples = max(max_samples, max(ao_samples, max(mesh_light_samples, subsurface_samples)));
+    max_samples = max(max_samples, volume_samples);
+  }
+
+  uint total_bounces = max_bounce + transparent_max_bounce + 3 + VOLUME_BOUNDS_MAX +
+                       max(BSSRDF_MAX_HITS, BSSRDF_MAX_BOUNCES);
+
+  max_samples *= total_bounces;
+
+  int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM;
+  dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS);
+
+  uint *directions = dscene->sobol_directions.alloc(SOBOL_BITS * dimensions);
+
+  sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
+
+  dscene->sobol_directions.copy_to_device();
+
+  need_update = false;
+}
+
+void Integrator::device_free(Device *, DeviceScene *dscene)
+{
+  dscene->sobol_directions.free();
+}
+
+bool Integrator::modified(const Integrator &integrator)
+{
+  return !Node::equals(integrator);
+}
+
+void Integrator::tag_update(Scene *scene)
+{
+  foreach (Shader *shader, scene->shaders) {
+    if (shader->has_integrator_dependency) {
+      scene->shader_manager->need_update = true;
+      break;
+    }
+  }
+  need_update = true;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
--- a/intern/cycles/render/integrator.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/integrator.h	2020-01-10 20:42:43.470923389 +0300
@@ -75,6 +75,9 @@
   bool sample_all_lights_indirect;
   float light_sampling_threshold;

+  int adaptive_min_samples;
+  float adaptive_threshold;
+
   enum Method {
     BRANCHED_PATH = 0,
     PATH = 1,
diff -Naur a/intern/cycles/render/integrator.h.orig b/intern/cycles/render/integrator.h.orig
--- a/intern/cycles/render/integrator.h.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/integrator.h.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTEGRATOR_H__
+#define __INTEGRATOR_H__
+
+#include "kernel/kernel_types.h"
+
+#include "graph/node.h"
+
+CCL_NAMESPACE_BEGIN
+
+class Device;
+class DeviceScene;
+class Scene;
+
+class Integrator : public Node {
+ public:
+  NODE_DECLARE
+
+  int min_bounce;
+  int max_bounce;
+
+  int max_diffuse_bounce;
+  int max_glossy_bounce;
+  int max_transmission_bounce;
+  int max_volume_bounce;
+
+  int transparent_min_bounce;
+  int transparent_max_bounce;
+
+  int ao_bounces;
+
+  int volume_max_steps;
+  float volume_step_size;
+
+  bool caustics_reflective;
+  bool caustics_refractive;
+  float filter_glossy;
+
+  int seed;
+
+  float sample_clamp_direct;
+  float sample_clamp_indirect;
+  bool motion_blur;
+
+  /* Maximum number of samples, beyond which we are likely to run into
+   * precision issues for sampling patterns. */
+  static const int MAX_SAMPLES = (1 << 24);
+
+  int aa_samples;
+  int diffuse_samples;
+  int glossy_samples;
+  int transmission_samples;
+  int ao_samples;
+  int mesh_light_samples;
+  int subsurface_samples;
+  int volume_samples;
+  int start_sample;
+
+  bool sample_all_lights_direct;
+  bool sample_all_lights_indirect;
+  float light_sampling_threshold;
+
+  enum Method {
+    BRANCHED_PATH = 0,
+    PATH = 1,
+
+    NUM_METHODS,
+  };
+
+  Method method;
+
+  SamplingPattern sampling_pattern;
+
+  bool need_update;
+
+  Integrator();
+  ~Integrator();
+
+  void device_update(Device *device, DeviceScene *dscene, Scene *scene);
+  void device_free(Device *device, DeviceScene *dscene);
+
+  bool modified(const Integrator &integrator);
+  void tag_update(Scene *scene);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __INTEGRATOR_H__ */
diff -Naur a/intern/cycles/render/jitter.cpp b/intern/cycles/render/jitter.cpp
--- a/intern/cycles/render/jitter.cpp	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/jitter.cpp	2020-01-10 20:42:43.470923389 +0300
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This file is based on "Progressive Multi-Jittered Sample Sequences"
+  * by Per Christensen, Andrew Kensler and Charlie Kilpatrick.
+  * http://graphics.pixar.com/library/ProgressiveMultiJitteredSampling/paper.pdf
+  *
+  * Performance can be improved in the future by implementing the new
+  * algorithm from Matt Pharr in  http://jcgt.org/published/0008/01/04/
+  * "Efficient Generation of Points that Satisfy Two-Dimensional Elementary Intervals"
+  */
+
+#include "render/jitter.h"
+
+#include <math.h>
+#include <vector>
+
+CCL_NAMESPACE_BEGIN
+
+static uint cmj_hash(uint i, uint p)
+{
+  i ^= p;
+  i ^= i >> 17;
+  i ^= i >> 10;
+  i *= 0xb36534e5;
+  i ^= i >> 12;
+  i ^= i >> 21;
+  i *= 0x93fc4795;
+  i ^= 0xdf6e307f;
+  i ^= i >> 17;
+  i *= 1 | p >> 18;
+
+  return i;
+}
+
+static float cmj_randfloat(uint i, uint p)
+{
+  return cmj_hash(i, p) * (1.0f / 4294967808.0f);
+}
+
+class PMJ_Generator {
+ public:
+  static void generate_2D(float2 points[], int size, int rng_seed_in)
+  {
+    PMJ_Generator g(rng_seed_in);
+    points[0].x = g.rnd();
+    points[0].y = g.rnd();
+    int N = 1;
+    while (N < size) {
+      g.extend_sequence_even(points, N);
+      g.extend_sequence_odd(points, 2 * N);
+      N = 4 * N;
+    }
+  }
+
+ protected:
+  PMJ_Generator(int rnd_seed_in) : num_samples(1), rnd_index(2), rnd_seed(rnd_seed_in)
+  {
+  }
+
+  float rnd()
+  {
+    return cmj_randfloat(++rnd_index, rnd_seed);
+  }
+
+  virtual void mark_occupied_strata(float2 points[], int N)
+  {
+    int NN = 2 * N;
+    for (int s = 0; s < NN; ++s) {
+      occupied1Dx[s] = occupied1Dy[s] = false;
+    }
+    for (int s = 0; s < N; ++s) {
+      int xstratum = (int)(NN * points[s].x);
+      int ystratum = (int)(NN * points[s].y);
+      occupied1Dx[xstratum] = true;
+      occupied1Dy[ystratum] = true;
+    }
+  }
+
+  virtual void generate_sample_point(
+      float2 points[], float i, float j, float xhalf, float yhalf, int n, int N)
+  {
+    int NN = 2 * N;
+    float2 pt;
+    int xstratum, ystratum;
+    do {
+      pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+      xstratum = (int)(NN * pt.x);
+    } while (occupied1Dx[xstratum]);
+    do {
+      pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+      ystratum = (int)(NN * pt.y);
+    } while (occupied1Dy[ystratum]);
+    occupied1Dx[xstratum] = true;
+    occupied1Dy[ystratum] = true;
+    points[num_samples] = pt;
+    ++num_samples;
+  }
+
+  void extend_sequence_even(float2 points[], int N)
+  {
+    int n = (int)sqrtf(N);
+    occupied1Dx.resize(2 * N);
+    occupied1Dy.resize(2 * N);
+    mark_occupied_strata(points, N);
+    for (int s = 0; s < N; ++s) {
+      float2 oldpt = points[s];
+      float i = floorf(n * oldpt.x);
+      float j = floorf(n * oldpt.y);
+      float xhalf = floorf(2.0f * (n * oldpt.x - i));
+      float yhalf = floorf(2.0f * (n * oldpt.y - j));
+      xhalf = 1.0f - xhalf;
+      yhalf = 1.0f - yhalf;
+      generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+    }
+  }
+
+  void extend_sequence_odd(float2 points[], int N)
+  {
+    int n = (int)sqrtf(N / 2);
+    occupied1Dx.resize(2 * N);
+    occupied1Dy.resize(2 * N);
+    mark_occupied_strata(points, N);
+    std::vector<float> xhalves(N / 2);
+    std::vector<float> yhalves(N / 2);
+    for (int s = 0; s < N / 2; ++s) {
+      float2 oldpt = points[s];
+      float i = floorf(n * oldpt.x);
+      float j = floorf(n * oldpt.y);
+      float xhalf = floorf(2.0f * (n * oldpt.x - i));
+      float yhalf = floorf(2.0f * (n * oldpt.y - j));
+      if (rnd() > 0.5f) {
+        xhalf = 1.0f - xhalf;
+      }
+      else {
+        yhalf = 1.0f - yhalf;
+      }
+      xhalves[s] = xhalf;
+      yhalves[s] = yhalf;
+      generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+    }
+    for (int s = 0; s < N / 2; ++s) {
+      float2 oldpt = points[s];
+      float i = floorf(n * oldpt.x);
+      float j = floorf(n * oldpt.y);
+      float xhalf = 1.0f - xhalves[s];
+      float yhalf = 1.0f - yhalves[s];
+      generate_sample_point(points, i, j, xhalf, yhalf, n, N);
+    }
+  }
+
+  std::vector<bool> occupied1Dx, occupied1Dy;
+  int num_samples;
+  int rnd_index, rnd_seed;
+};
+
+class PMJ02_Generator : public PMJ_Generator {
+ protected:
+  void generate_sample_point(
+      float2 points[], float i, float j, float xhalf, float yhalf, int n, int N) override
+  {
+    int NN = 2 * N;
+    float2 pt;
+    do {
+      pt.x = (i + 0.5f * (xhalf + rnd())) / n;
+      pt.y = (j + 0.5f * (yhalf + rnd())) / n;
+    } while (is_occupied(pt, NN));
+    mark_occupied_strata1(pt, NN);
+    points[num_samples] = pt;
+    ++num_samples;
+  }
+
+  void mark_occupied_strata(float2 points[], int N) override
+  {
+    int NN = 2 * N;
+    int num_shapes = (int)log2f(NN) + 1;
+    occupiedStrata.resize(num_shapes);
+    for (int shape = 0; shape < num_shapes; ++shape) {
+      occupiedStrata[shape].resize(NN);
+      for (int n = 0; n < NN; ++n) {
+        occupiedStrata[shape][n] = false;
+      }
+    }
+    for (int s = 0; s < N; ++s) {
+      mark_occupied_strata1(points[s], NN);
+    }
+  }
+
+  void mark_occupied_strata1(float2 pt, int NN)
+  {
+    int shape = 0;
+    int xdivs = NN;
+    int ydivs = 1;
+    do {
+      int xstratum = (int)(xdivs * pt.x);
+      int ystratum = (int)(ydivs * pt.y);
+      size_t index = ystratum * xdivs + xstratum;
+      assert(index < NN);
+      occupiedStrata[shape][index] = true;
+      shape = shape + 1;
+      xdivs = xdivs / 2;
+      ydivs = ydivs * 2;
+    } while (xdivs > 0);
+  }
+
+  bool is_occupied(float2 pt, int NN)
+  {
+    int shape = 0;
+    int xdivs = NN;
+    int ydivs = 1;
+    do {
+      int xstratum = (int)(xdivs * pt.x);
+      int ystratum = (int)(ydivs * pt.y);
+      size_t index = ystratum * xdivs + xstratum;
+      assert(index < NN);
+      if (occupiedStrata[shape][index]) {
+        return true;
+      }
+      shape = shape + 1;
+      xdivs = xdivs / 2;
+      ydivs = ydivs * 2;
+    } while (xdivs > 0);
+    return false;
+  }
+
+ private:
+  std::vector<std::vector<bool>> occupiedStrata;
+};
+
+static void shuffle(float2 points[], int size, int rng_seed)
+{
+  /* Offset samples by 1.0 for faster scrambling in kernel_random.h */
+  for (int i = 0; i < size; ++i) {
+    points[i].x += 1.0f;
+    points[i].y += 1.0f;
+  }
+
+  if (rng_seed == 0) {
+    return;
+  }
+
+  constexpr int odd[8] = {0, 1, 4, 5, 10, 11, 14, 15};
+  constexpr int even[8] = {2, 3, 6, 7, 8, 9, 12, 13};
+
+  int rng_index = 0;
+  for (int yy = 0; yy < size / 16; ++yy) {
+    for (int xx = 0; xx < 8; ++xx) {
+      int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+      float2 tmp = points[odd[other] + yy * 16];
+      points[odd[other] + yy * 16] = points[odd[xx] + yy * 16];
+      points[odd[xx] + yy * 16] = tmp;
+    }
+    for (int xx = 0; xx < 8; ++xx) {
+      int other = (int)(cmj_randfloat(++rng_index, rng_seed) * (8.0f - xx) + xx);
+      float2 tmp = points[even[other] + yy * 16];
+      points[even[other] + yy * 16] = points[even[xx] + yy * 16];
+      points[even[xx] + yy * 16] = tmp;
+    }
+  }
+}
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed)
+{
+  PMJ_Generator::generate_2D(points, size, rng_seed);
+  shuffle(points, size, rng_seed);
+}
+
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed)
+{
+  PMJ02_Generator::generate_2D(points, size, rng_seed);
+  shuffle(points, size, rng_seed);
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/jitter.h b/intern/cycles/render/jitter.h
--- a/intern/cycles/render/jitter.h	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/jitter.h	2020-01-10 20:42:43.470923389 +0300
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __JITTER_H__
+#define __JITTER_H__
+
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+void progressive_multi_jitter_generate_2D(float2 points[], int size, int rng_seed);
+void progressive_multi_jitter_02_generate_2D(float2 points[], int size, int rng_seed);
+
+CCL_NAMESPACE_END
+
+#endif /* __JITTER_H__ */
diff -Naur a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
--- a/intern/cycles/render/scene.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/scene.cpp	2020-01-10 20:42:43.474256721 +0300
@@ -77,7 +77,7 @@
       svm_nodes(device, "__svm_nodes", MEM_TEXTURE),
       shaders(device, "__shaders", MEM_TEXTURE),
       lookup_table(device, "__lookup_table", MEM_TEXTURE),
-      sobol_directions(device, "__sobol_directions", MEM_TEXTURE),
+      sample_pattern_lut(device, "__sample_pattern_lut", MEM_TEXTURE),
       ies_lights(device, "__ies", MEM_TEXTURE)
 {
   memset((void *)&data, 0, sizeof(data));
diff -Naur a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
--- a/intern/cycles/render/scene.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/scene.h	2020-01-10 20:42:43.474256721 +0300
@@ -119,7 +119,7 @@
   device_vector<float> lookup_table;

   /* integrator */
-  device_vector<uint> sobol_directions;
+  device_vector<uint> sample_pattern_lut;

   /* ies lights */
   device_vector<float> ies_lights;
diff -Naur a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
--- a/intern/cycles/render/session.cpp	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/session.cpp	2020-01-10 20:42:43.474256721 +0300
@@ -900,7 +900,7 @@
   Integrator *integrator = scene->integrator;
   BakeManager *bake_manager = scene->bake_manager;

-  if (integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || bake_manager->get_baking()) {
+  if (integrator->sampling_pattern != SAMPLING_PATTERN_SOBOL || bake_manager->get_baking()) {
     int aa_samples = tile_manager.num_samples;

     if (aa_samples != integrator->aa_samples) {
@@ -1022,6 +1022,7 @@
   task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
   task.need_finish_queue = params.progressive_refine;
   task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
+  task.integrator_adaptive = scene->integrator->sampling_pattern == SAMPLING_PATTERN_PMJ;
   task.requested_tile_size = params.tile_size;
   task.passes_size = tile_manager.params.get_passes_size();

diff -Naur a/intern/cycles/render/session.cpp.orig b/intern/cycles/render/session.cpp.orig
--- a/intern/cycles/render/session.cpp.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/session.cpp.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,1163 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <limits.h>
+
+#include "render/buffers.h"
+#include "render/camera.h"
+#include "device/device.h"
+#include "render/graph.h"
+#include "render/integrator.h"
+#include "render/light.h"
+#include "render/mesh.h"
+#include "render/object.h"
+#include "render/scene.h"
+#include "render/session.h"
+#include "render/bake.h"
+
+#include "util/util_foreach.h"
+#include "util/util_function.h"
+#include "util/util_logging.h"
+#include "util/util_math.h"
+#include "util/util_opengl.h"
+#include "util/util_task.h"
+#include "util/util_time.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Note about  preserve_tile_device option for tile manager:
+ * progressive refine and viewport rendering does requires tiles to
+ * always be allocated for the same device
+ */
+Session::Session(const SessionParams &params_)
+    : params(params_),
+      tile_manager(params.progressive,
+                   params.samples,
+                   params.tile_size,
+                   params.start_resolution,
+                   params.background == false || params.progressive_refine,
+                   params.background,
+                   params.tile_order,
+                   max(params.device.multi_devices.size(), 1),
+                   params.pixel_size),
+      stats(),
+      profiler()
+{
+  device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background);
+
+  TaskScheduler::init(params.threads);
+
+  device = Device::create(params.device, stats, profiler, params.background);
+
+  if (params.background && !params.write_render_cb) {
+    buffers = NULL;
+    display = NULL;
+  }
+  else {
+    buffers = new RenderBuffers(device);
+    display = new DisplayBuffer(device, params.display_buffer_linear);
+  }
+
+  session_thread = NULL;
+  scene = NULL;
+
+  reset_time = 0.0;
+  last_update_time = 0.0;
+
+  delayed_reset.do_reset = false;
+  delayed_reset.samples = 0;
+
+  display_outdated = false;
+  gpu_draw_ready = false;
+  gpu_need_display_buffer_update = false;
+  pause = false;
+  kernels_loaded = false;
+
+  /* TODO(sergey): Check if it's indeed optimal value for the split kernel. */
+  max_closure_global = 1;
+}
+
+Session::~Session()
+{
+  if (session_thread) {
+    /* wait for session thread to end */
+    progress.set_cancel("Exiting");
+
+    gpu_need_display_buffer_update = false;
+    gpu_need_display_buffer_update_cond.notify_all();
+
+    {
+      thread_scoped_lock pause_lock(pause_mutex);
+      pause = false;
+    }
+    pause_cond.notify_all();
+
+    wait();
+  }
+
+  if (params.write_render_cb) {
+    /* Copy to display buffer and write out image if requested */
+    delete display;
+
+    display = new DisplayBuffer(device, false);
+    display->reset(buffers->params);
+    copy_to_display_buffer(params.samples);
+
+    int w = display->draw_width;
+    int h = display->draw_height;
+    uchar4 *pixels = display->rgba_byte.copy_from_device(0, w, h);
+    params.write_render_cb((uchar *)pixels, w, h, 4);
+  }
+
+  /* clean up */
+  tile_manager.device_free();
+
+  delete buffers;
+  delete display;
+  delete scene;
+  delete device;
+
+  TaskScheduler::exit();
+}
+
+void Session::start()
+{
+  if (!session_thread) {
+    session_thread = new thread(function_bind(&Session::run, this));
+  }
+}
+
+bool Session::ready_to_reset()
+{
+  double dt = time_dt() - reset_time;
+
+  if (!display_outdated)
+    return (dt > params.reset_timeout);
+  else
+    return (dt > params.cancel_timeout);
+}
+
+/* GPU Session */
+
+void Session::reset_gpu(BufferParams &buffer_params, int samples)
+{
+  thread_scoped_lock pause_lock(pause_mutex);
+
+  /* block for buffer access and reset immediately. we can't do this
+   * in the thread, because we need to allocate an OpenGL buffer, and
+   * that only works in the main thread */
+  thread_scoped_lock display_lock(display_mutex);
+  thread_scoped_lock buffers_lock(buffers_mutex);
+
+  display_outdated = true;
+  reset_time = time_dt();
+
+  reset_(buffer_params, samples);
+
+  gpu_need_display_buffer_update = false;
+  gpu_need_display_buffer_update_cond.notify_all();
+
+  pause_cond.notify_all();
+}
+
+bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_params)
+{
+  /* block for buffer access */
+  thread_scoped_lock display_lock(display_mutex);
+
+  /* first check we already rendered something */
+  if (gpu_draw_ready) {
+    /* then verify the buffers have the expected size, so we don't
+     * draw previous results in a resized window */
+    if (!buffer_params.modified(display->params)) {
+      /* for CUDA we need to do tone-mapping still, since we can
+       * only access GL buffers from the main thread. */
+      if (gpu_need_display_buffer_update) {
+        thread_scoped_lock buffers_lock(buffers_mutex);
+        copy_to_display_buffer(tile_manager.state.sample);
+        gpu_need_display_buffer_update = false;
+        gpu_need_display_buffer_update_cond.notify_all();
+      }
+
+      display->draw(device, draw_params);
+
+      if (display_outdated && (time_dt() - reset_time) > params.text_timeout)
+        return false;
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void Session::run_gpu()
+{
+  bool tiles_written = false;
+
+  reset_time = time_dt();
+  last_update_time = time_dt();
+
+  progress.set_render_start_time();
+
+  while (!progress.get_cancel()) {
+    /* advance to next tile */
+    bool no_tiles = !tile_manager.next();
+
+    DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+    if (no_tiles) {
+      kernel_state = device->get_active_kernel_switch_state();
+    }
+
+    if (params.background) {
+      /* if no work left and in background mode, we can stop immediately */
+      if (no_tiles) {
+        progress.set_status("Finished");
+        break;
+      }
+    }
+
+    /* Don't go in pause mode when image was rendered with preview kernels
+     * When feature kernels become available the session will be reset. */
+    else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+      time_sleep(0.1);
+    }
+    else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+      reset_gpu(tile_manager.params, params.samples);
+    }
+
+    else {
+      /* if in interactive mode, and we are either paused or done for now,
+       * wait for pause condition notify to wake up again */
+      thread_scoped_lock pause_lock(pause_mutex);
+
+      if (!pause && !tile_manager.done()) {
+        /* reset could have happened after no_tiles was set, before this lock.
+         * in this case we shall not wait for pause condition
+         */
+      }
+      else if (pause || no_tiles) {
+        update_status_time(pause, no_tiles);
+
+        while (1) {
+          scoped_timer pause_timer;
+          pause_cond.wait(pause_lock);
+          if (pause) {
+            progress.add_skip_time(pause_timer, params.background);
+          }
+
+          update_status_time(pause, no_tiles);
+          progress.set_update();
+
+          if (!pause)
+            break;
+        }
+      }
+
+      if (progress.get_cancel())
+        break;
+    }
+
+    if (!no_tiles) {
+      /* update scene */
+      scoped_timer update_timer;
+      if (update_scene()) {
+        profiler.reset(scene->shaders.size(), scene->objects.size());
+      }
+      progress.add_skip_time(update_timer, params.background);
+
+      if (!device->error_message().empty())
+        progress.set_error(device->error_message());
+
+      if (progress.get_cancel())
+        break;
+
+      /* buffers mutex is locked entirely while rendering each
+       * sample, and released/reacquired on each iteration to allow
+       * reset and draw in between */
+      thread_scoped_lock buffers_lock(buffers_mutex);
+
+      /* update status and timing */
+      update_status_time();
+
+      /* render */
+      render();
+
+      device->task_wait();
+
+      if (!device->error_message().empty())
+        progress.set_cancel(device->error_message());
+
+      /* update status and timing */
+      update_status_time();
+
+      gpu_need_display_buffer_update = true;
+      gpu_draw_ready = true;
+      progress.set_update();
+
+      /* wait for until display buffer is updated */
+      if (!params.background) {
+        while (gpu_need_display_buffer_update) {
+          if (progress.get_cancel())
+            break;
+
+          gpu_need_display_buffer_update_cond.wait(buffers_lock);
+        }
+      }
+
+      if (!device->error_message().empty())
+        progress.set_error(device->error_message());
+
+      tiles_written = update_progressive_refine(progress.get_cancel());
+
+      if (progress.get_cancel())
+        break;
+    }
+  }
+
+  if (!tiles_written)
+    update_progressive_refine(true);
+}
+
+/* CPU Session */
+
+void Session::reset_cpu(BufferParams &buffer_params, int samples)
+{
+  thread_scoped_lock reset_lock(delayed_reset.mutex);
+  thread_scoped_lock pause_lock(pause_mutex);
+
+  display_outdated = true;
+  reset_time = time_dt();
+
+  delayed_reset.params = buffer_params;
+  delayed_reset.samples = samples;
+  delayed_reset.do_reset = true;
+  device->task_cancel();
+
+  pause_cond.notify_all();
+}
+
+bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_params)
+{
+  thread_scoped_lock display_lock(display_mutex);
+
+  /* first check we already rendered something */
+  if (display->draw_ready()) {
+    /* then verify the buffers have the expected size, so we don't
+     * draw previous results in a resized window */
+    if (!buffer_params.modified(display->params)) {
+      display->draw(device, draw_params);
+
+      if (display_outdated && (time_dt() - reset_time) > params.text_timeout)
+        return false;
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool Session::acquire_tile(Device *tile_device, RenderTile &rtile)
+{
+  if (progress.get_cancel()) {
+    if (params.progressive_refine == false) {
+      /* for progressive refine current sample should be finished for all tiles */
+      return false;
+    }
+  }
+
+  thread_scoped_lock tile_lock(tile_mutex);
+
+  /* get next tile from manager */
+  Tile *tile;
+  int device_num = device->device_number(tile_device);
+
+  if (!tile_manager.next_tile(tile, device_num))
+    return false;
+
+  /* fill render tile */
+  rtile.x = tile_manager.state.buffer.full_x + tile->x;
+  rtile.y = tile_manager.state.buffer.full_y + tile->y;
+  rtile.w = tile->w;
+  rtile.h = tile->h;
+  rtile.start_sample = tile_manager.state.sample;
+  rtile.num_samples = tile_manager.state.num_samples;
+  rtile.resolution = tile_manager.state.resolution_divider;
+  rtile.tile_index = tile->index;
+  rtile.task = (tile->state == Tile::DENOISE) ? RenderTile::DENOISE : RenderTile::PATH_TRACE;
+
+  tile_lock.unlock();
+
+  /* in case of a permanent buffer, return it, otherwise we will allocate
+   * a new temporary buffer */
+  if (buffers) {
+    tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
+
+    rtile.buffer = buffers->buffer.device_pointer;
+    rtile.buffers = buffers;
+
+    device->map_tile(tile_device, rtile);
+
+    return true;
+  }
+
+  if (tile->buffers == NULL) {
+    /* fill buffer parameters */
+    BufferParams buffer_params = tile_manager.params;
+    buffer_params.full_x = rtile.x;
+    buffer_params.full_y = rtile.y;
+    buffer_params.width = rtile.w;
+    buffer_params.height = rtile.h;
+
+    /* allocate buffers */
+    tile->buffers = new RenderBuffers(tile_device);
+    tile->buffers->reset(buffer_params);
+  }
+
+  tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride);
+
+  rtile.buffer = tile->buffers->buffer.device_pointer;
+  rtile.buffers = tile->buffers;
+  rtile.sample = tile_manager.state.sample;
+
+  /* this will tag tile as IN PROGRESS in blender-side render pipeline,
+   * which is needed to highlight currently rendering tile before first
+   * sample was processed for it
+   */
+  update_tile_sample(rtile);
+
+  return true;
+}
+
+void Session::update_tile_sample(RenderTile &rtile)
+{
+  thread_scoped_lock tile_lock(tile_mutex);
+
+  if (update_render_tile_cb) {
+    if (params.progressive_refine == false) {
+      /* todo: optimize this by making it thread safe and removing lock */
+
+      update_render_tile_cb(rtile, true);
+    }
+  }
+
+  update_status_time();
+}
+
+void Session::release_tile(RenderTile &rtile)
+{
+  thread_scoped_lock tile_lock(tile_mutex);
+
+  progress.add_finished_tile(rtile.task == RenderTile::DENOISE);
+
+  bool delete_tile;
+
+  if (tile_manager.finish_tile(rtile.tile_index, delete_tile)) {
+    if (write_render_tile_cb && params.progressive_refine == false) {
+      write_render_tile_cb(rtile);
+    }
+
+    if (delete_tile) {
+      delete rtile.buffers;
+      tile_manager.state.tiles[rtile.tile_index].buffers = NULL;
+    }
+  }
+  else {
+    if (update_render_tile_cb && params.progressive_refine == false) {
+      update_render_tile_cb(rtile, false);
+    }
+  }
+
+  update_status_time();
+}
+
+void Session::map_neighbor_tiles(RenderTile *tiles, Device *tile_device)
+{
+  thread_scoped_lock tile_lock(tile_mutex);
+
+  int center_idx = tiles[4].tile_index;
+  assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
+  BufferParams buffer_params = tile_manager.params;
+  int4 image_region = make_int4(buffer_params.full_x,
+                                buffer_params.full_y,
+                                buffer_params.full_x + buffer_params.width,
+                                buffer_params.full_y + buffer_params.height);
+
+  for (int dy = -1, i = 0; dy <= 1; dy++) {
+    for (int dx = -1; dx <= 1; dx++, i++) {
+      int px = tiles[4].x + dx * params.tile_size.x;
+      int py = tiles[4].y + dy * params.tile_size.y;
+      if (px >= image_region.x && py >= image_region.y && px < image_region.z &&
+          py < image_region.w) {
+        int tile_index = center_idx + dy * tile_manager.state.tile_stride + dx;
+        Tile *tile = &tile_manager.state.tiles[tile_index];
+        assert(tile->buffers);
+
+        tiles[i].buffer = tile->buffers->buffer.device_pointer;
+        tiles[i].x = tile_manager.state.buffer.full_x + tile->x;
+        tiles[i].y = tile_manager.state.buffer.full_y + tile->y;
+        tiles[i].w = tile->w;
+        tiles[i].h = tile->h;
+        tiles[i].buffers = tile->buffers;
+
+        tile->buffers->params.get_offset_stride(tiles[i].offset, tiles[i].stride);
+      }
+      else {
+        tiles[i].buffer = (device_ptr)NULL;
+        tiles[i].buffers = NULL;
+        tiles[i].x = clamp(px, image_region.x, image_region.z);
+        tiles[i].y = clamp(py, image_region.y, image_region.w);
+        tiles[i].w = tiles[i].h = 0;
+      }
+    }
+  }
+
+  assert(tiles[4].buffers);
+  device->map_neighbor_tiles(tile_device, tiles);
+
+  /* The denoised result is written back to the original tile. */
+  tiles[9] = tiles[4];
+}
+
+void Session::unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device)
+{
+  thread_scoped_lock tile_lock(tile_mutex);
+  device->unmap_neighbor_tiles(tile_device, tiles);
+}
+
+void Session::run_cpu()
+{
+  bool tiles_written = false;
+
+  last_update_time = time_dt();
+
+  {
+    /* reset once to start */
+    thread_scoped_lock reset_lock(delayed_reset.mutex);
+    thread_scoped_lock buffers_lock(buffers_mutex);
+    thread_scoped_lock display_lock(display_mutex);
+
+    reset_(delayed_reset.params, delayed_reset.samples);
+    delayed_reset.do_reset = false;
+  }
+
+  while (!progress.get_cancel()) {
+    /* advance to next tile */
+    bool no_tiles = !tile_manager.next();
+    bool need_copy_to_display_buffer = false;
+
+    DeviceKernelStatus kernel_state = DEVICE_KERNEL_UNKNOWN;
+    if (no_tiles) {
+      kernel_state = device->get_active_kernel_switch_state();
+    }
+
+    if (params.background) {
+      /* if no work left and in background mode, we can stop immediately */
+      if (no_tiles) {
+        progress.set_status("Finished");
+        break;
+      }
+    }
+
+    /* Don't go in pause mode when preview kernels are used
+     * When feature kernels become available the session will be resetted. */
+    else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+      time_sleep(0.1);
+    }
+    else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) {
+      reset_cpu(tile_manager.params, params.samples);
+    }
+
+    else {
+      /* if in interactive mode, and we are either paused or done for now,
+       * wait for pause condition notify to wake up again */
+      thread_scoped_lock pause_lock(pause_mutex);
+
+      if (!pause && delayed_reset.do_reset) {
+        /* reset once to start */
+        thread_scoped_lock reset_lock(delayed_reset.mutex);
+        thread_scoped_lock buffers_lock(buffers_mutex);
+        thread_scoped_lock display_lock(display_mutex);
+
+        reset_(delayed_reset.params, delayed_reset.samples);
+        delayed_reset.do_reset = false;
+      }
+      else if (pause || no_tiles) {
+        update_status_time(pause, no_tiles);
+
+        while (1) {
+          scoped_timer pause_timer;
+          pause_cond.wait(pause_lock);
+          if (pause) {
+            progress.add_skip_time(pause_timer, params.background);
+          }
+
+          update_status_time(pause, no_tiles);
+          progress.set_update();
+
+          if (!pause)
+            break;
+        }
+      }
+
+      if (progress.get_cancel())
+        break;
+    }
+
+    if (!no_tiles) {
+      /* buffers mutex is locked entirely while rendering each
+       * sample, and released/reacquired on each iteration to allow
+       * reset and draw in between */
+      thread_scoped_lock buffers_lock(buffers_mutex);
+
+      /* update scene */
+      scoped_timer update_timer;
+      if (update_scene()) {
+        profiler.reset(scene->shaders.size(), scene->objects.size());
+      }
+      progress.add_skip_time(update_timer, params.background);
+
+      if (!device->error_message().empty())
+        progress.set_error(device->error_message());
+
+      if (progress.get_cancel())
+        break;
+
+      /* update status and timing */
+      update_status_time();
+
+      /* render */
+      render();
+
+      /* update status and timing */
+      update_status_time();
+
+      if (!params.background)
+        need_copy_to_display_buffer = true;
+
+      if (!device->error_message().empty())
+        progress.set_error(device->error_message());
+    }
+
+    device->task_wait();
+
+    {
+      thread_scoped_lock reset_lock(delayed_reset.mutex);
+      thread_scoped_lock buffers_lock(buffers_mutex);
+      thread_scoped_lock display_lock(display_mutex);
+
+      if (delayed_reset.do_reset) {
+        /* reset rendering if request from main thread */
+        delayed_reset.do_reset = false;
+        reset_(delayed_reset.params, delayed_reset.samples);
+      }
+      else if (need_copy_to_display_buffer) {
+        /* Only copy to display_buffer if we do not reset, we don't
+         * want to show the result of an incomplete sample */
+        copy_to_display_buffer(tile_manager.state.sample);
+      }
+
+      if (!device->error_message().empty())
+        progress.set_error(device->error_message());
+
+      tiles_written = update_progressive_refine(progress.get_cancel());
+    }
+
+    progress.set_update();
+  }
+
+  if (!tiles_written)
+    update_progressive_refine(true);
+}
+
+DeviceRequestedFeatures Session::get_requested_device_features()
+{
+  /* TODO(sergey): Consider moving this to the Scene level. */
+  DeviceRequestedFeatures requested_features;
+  requested_features.experimental = params.experimental;
+
+  scene->shader_manager->get_requested_features(scene, &requested_features);
+
+  /* This features are not being tweaked as often as shaders,
+   * so could be done selective magic for the viewport as well.
+   */
+  bool use_motion = scene->need_motion() == Scene::MotionType::MOTION_BLUR;
+  requested_features.use_hair = false;
+  requested_features.use_object_motion = false;
+  requested_features.use_camera_motion = use_motion && scene->camera->use_motion();
+  foreach (Object *object, scene->objects) {
+    Mesh *mesh = object->mesh;
+    if (mesh->num_curves()) {
+      requested_features.use_hair = true;
+    }
+    if (use_motion) {
+      requested_features.use_object_motion |= object->use_motion() | mesh->use_motion_blur;
+      requested_features.use_camera_motion |= mesh->use_motion_blur;
+    }
+#ifdef WITH_OPENSUBDIV
+    if (mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
+      requested_features.use_patch_evaluation = true;
+    }
+#endif
+    if (object->is_shadow_catcher) {
+      requested_features.use_shadow_tricks = true;
+    }
+    requested_features.use_true_displacement |= mesh->has_true_displacement();
+  }
+
+  requested_features.use_background_light = scene->light_manager->has_background_light(scene);
+
+  BakeManager *bake_manager = scene->bake_manager;
+  requested_features.use_baking = bake_manager->get_baking();
+  requested_features.use_integrator_branched = (scene->integrator->method ==
+                                                Integrator::BRANCHED_PATH);
+  if (params.run_denoising) {
+    requested_features.use_denoising = true;
+    requested_features.use_shadow_tricks = true;
+  }
+
+  return requested_features;
+}
+
+bool Session::load_kernels(bool lock_scene)
+{
+  thread_scoped_lock scene_lock;
+  if (lock_scene) {
+    scene_lock = thread_scoped_lock(scene->mutex);
+  }
+
+  DeviceRequestedFeatures requested_features = get_requested_device_features();
+
+  if (!kernels_loaded || loaded_kernel_features.modified(requested_features)) {
+    progress.set_status("Loading render kernels (may take a few minutes the first time)");
+
+    scoped_timer timer;
+
+    VLOG(2) << "Requested features:\n" << requested_features;
+    if (!device->load_kernels(requested_features)) {
+      string message = device->error_message();
+      if (message.empty())
+        message = "Failed loading render kernel, see console for errors";
+
+      progress.set_error(message);
+      progress.set_status("Error", message);
+      progress.set_update();
+      return false;
+    }
+
+    progress.add_skip_time(timer, false);
+    VLOG(1) << "Total time spent loading kernels: " << time_dt() - timer.get_start();
+
+    kernels_loaded = true;
+    loaded_kernel_features = requested_features;
+    return true;
+  }
+  return false;
+}
+
+void Session::run()
+{
+  if (params.use_profiling && (params.device.type == DEVICE_CPU)) {
+    profiler.start();
+  }
+
+  /* session thread loop */
+  progress.set_status("Waiting for render to start");
+
+  /* run */
+  if (!progress.get_cancel()) {
+    /* reset number of rendered samples */
+    progress.reset_sample();
+
+    if (device_use_gl)
+      run_gpu();
+    else
+      run_cpu();
+  }
+
+  profiler.stop();
+
+  /* progress update */
+  if (progress.get_cancel())
+    progress.set_status("Cancel", progress.get_cancel_message());
+  else
+    progress.set_update();
+}
+
+bool Session::draw(BufferParams &buffer_params, DeviceDrawParams &draw_params)
+{
+  if (device_use_gl)
+    return draw_gpu(buffer_params, draw_params);
+  else
+    return draw_cpu(buffer_params, draw_params);
+}
+
+void Session::reset_(BufferParams &buffer_params, int samples)
+{
+  if (buffers && buffer_params.modified(tile_manager.params)) {
+    gpu_draw_ready = false;
+    buffers->reset(buffer_params);
+    if (display) {
+      display->reset(buffer_params);
+    }
+  }
+
+  tile_manager.reset(buffer_params, samples);
+  progress.reset_sample();
+
+  bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX;
+  progress.set_total_pixel_samples(show_progress ? tile_manager.state.total_pixel_samples : 0);
+
+  if (!params.background)
+    progress.set_start_time();
+  progress.set_render_start_time();
+}
+
+void Session::reset(BufferParams &buffer_params, int samples)
+{
+  if (device_use_gl)
+    reset_gpu(buffer_params, samples);
+  else
+    reset_cpu(buffer_params, samples);
+}
+
+void Session::set_samples(int samples)
+{
+  if (samples != params.samples) {
+    params.samples = samples;
+    tile_manager.set_samples(samples);
+
+    {
+      thread_scoped_lock pause_lock(pause_mutex);
+    }
+    pause_cond.notify_all();
+  }
+}
+
+void Session::set_pause(bool pause_)
+{
+  bool notify = false;
+
+  {
+    thread_scoped_lock pause_lock(pause_mutex);
+
+    if (pause != pause_) {
+      pause = pause_;
+      notify = true;
+    }
+  }
+
+  if (notify)
+    pause_cond.notify_all();
+}
+
+void Session::wait()
+{
+  if (session_thread) {
+    session_thread->join();
+    delete session_thread;
+  }
+
+  session_thread = NULL;
+}
+
+bool Session::update_scene()
+{
+  thread_scoped_lock scene_lock(scene->mutex);
+
+  /* update camera if dimensions changed for progressive render. the camera
+   * knows nothing about progressive or cropped rendering, it just gets the
+   * image dimensions passed in */
+  Camera *cam = scene->camera;
+  int width = tile_manager.state.buffer.full_width;
+  int height = tile_manager.state.buffer.full_height;
+  int resolution = tile_manager.state.resolution_divider;
+
+  if (width != cam->width || height != cam->height) {
+    cam->width = width;
+    cam->height = height;
+    cam->resolution = resolution;
+    cam->tag_update();
+  }
+
+  /* number of samples is needed by multi jittered
+   * sampling pattern and by baking */
+  Integrator *integrator = scene->integrator;
+  BakeManager *bake_manager = scene->bake_manager;
+
+  if (integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || bake_manager->get_baking()) {
+    int aa_samples = tile_manager.num_samples;
+
+    if (aa_samples != integrator->aa_samples) {
+      integrator->aa_samples = aa_samples;
+      integrator->tag_update(scene);
+    }
+  }
+
+  /* update scene */
+  if (scene->need_update()) {
+    bool new_kernels_needed = load_kernels(false);
+
+    /* Update max_closures. */
+    KernelIntegrator *kintegrator = &scene->dscene.data.integrator;
+    if (params.background) {
+      kintegrator->max_closures = get_max_closure_count();
+    }
+    else {
+      /* Currently viewport render is faster with higher max_closures, needs investigating. */
+      kintegrator->max_closures = MAX_CLOSURE;
+    }
+
+    progress.set_status("Updating Scene");
+    MEM_GUARDED_CALL(&progress, scene->device_update, device, progress);
+
+    DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state();
+    bool kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE ||
+                                kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID;
+    if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) {
+      progress.set_kernel_status("Compiling render kernels");
+    }
+    if (new_kernels_needed || kernel_switch_needed) {
+      progress.set_kernel_status("Compiling render kernels");
+      device->wait_for_availability(loaded_kernel_features);
+      progress.set_kernel_status("");
+    }
+
+    if (kernel_switch_needed) {
+      reset(tile_manager.params, params.samples);
+    }
+    return true;
+  }
+  return false;
+}
+
+void Session::update_status_time(bool show_pause, bool show_done)
+{
+  int progressive_sample = tile_manager.state.sample;
+  int num_samples = tile_manager.get_num_effective_samples();
+
+  int tile = progress.get_rendered_tiles();
+  int num_tiles = tile_manager.state.num_tiles;
+
+  /* update status */
+  string status, substatus;
+
+  if (!params.progressive) {
+    const bool is_cpu = params.device.type == DEVICE_CPU;
+    const bool rendering_finished = (tile == num_tiles);
+    const bool is_last_tile = (tile + 1) == num_tiles;
+
+    substatus = string_printf("Rendered %d/%d Tiles", tile, num_tiles);
+
+    if (!rendering_finished && (device->show_samples() || (is_cpu && is_last_tile))) {
+      /* Some devices automatically support showing the sample number:
+       * - CUDADevice
+       * - OpenCLDevice when using the megakernel (the split kernel renders multiple
+       *   samples at the same time, so the current sample isn't really defined)
+       * - CPUDevice when using one thread
+       * For these devices, the current sample is always shown.
+       *
+       * The other option is when the last tile is currently being rendered by the CPU.
+       */
+      substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
+    }
+    if (params.full_denoising || params.optix_denoising) {
+      substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles());
+    }
+    else if (params.run_denoising) {
+      substatus += string_printf(", Prefiltered %d tiles", progress.get_denoised_tiles());
+    }
+  }
+  else if (tile_manager.num_samples == Integrator::MAX_SAMPLES)
+    substatus = string_printf("Path Tracing Sample %d", progressive_sample + 1);
+  else
+    substatus = string_printf("Path Tracing Sample %d/%d", progressive_sample + 1, num_samples);
+
+  if (show_pause) {
+    status = "Rendering Paused";
+  }
+  else if (show_done) {
+    status = "Rendering Done";
+    progress.set_end_time(); /* Save end time so that further calls to get_time are accurate. */
+  }
+  else {
+    status = substatus;
+    substatus.clear();
+  }
+
+  progress.set_status(status, substatus);
+}
+
+void Session::render()
+{
+  /* Clear buffers. */
+  if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) {
+    buffers->zero();
+  }
+
+  /* Add path trace task. */
+  DeviceTask task(DeviceTask::RENDER);
+
+  task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2);
+  task.release_tile = function_bind(&Session::release_tile, this, _1);
+  task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2);
+  task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2);
+  task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
+  task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
+  task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
+  task.need_finish_queue = params.progressive_refine;
+  task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
+  task.requested_tile_size = params.tile_size;
+  task.passes_size = tile_manager.params.get_passes_size();
+
+  if (params.run_denoising) {
+    task.denoising = params.denoising;
+
+    assert(!scene->film->need_update);
+    task.pass_stride = scene->film->pass_stride;
+    task.target_pass_stride = task.pass_stride;
+    task.pass_denoising_data = scene->film->denoising_data_offset;
+    task.pass_denoising_clean = scene->film->denoising_clean_offset;
+
+    task.denoising_from_render = true;
+    task.denoising_do_filter = params.full_denoising;
+    task.denoising_use_optix = params.optix_denoising;
+    task.denoising_write_passes = params.write_denoising_passes;
+  }
+
+  device->task_add(task);
+}
+
+void Session::copy_to_display_buffer(int sample)
+{
+  /* add film conversion task */
+  DeviceTask task(DeviceTask::FILM_CONVERT);
+
+  task.x = tile_manager.state.buffer.full_x;
+  task.y = tile_manager.state.buffer.full_y;
+  task.w = tile_manager.state.buffer.width;
+  task.h = tile_manager.state.buffer.height;
+  task.rgba_byte = display->rgba_byte.device_pointer;
+  task.rgba_half = display->rgba_half.device_pointer;
+  task.buffer = buffers->buffer.device_pointer;
+  task.sample = sample;
+  tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
+
+  if (task.w > 0 && task.h > 0) {
+    device->task_add(task);
+    device->task_wait();
+
+    /* set display to new size */
+    display->draw_set(task.w, task.h);
+  }
+
+  display_outdated = false;
+}
+
+bool Session::update_progressive_refine(bool cancel)
+{
+  int sample = tile_manager.state.sample + 1;
+  bool write = sample == tile_manager.num_samples || cancel;
+
+  double current_time = time_dt();
+
+  if (current_time - last_update_time < params.progressive_update_timeout) {
+    /* if last sample was processed, we need to write buffers anyway  */
+    if (!write && sample != 1)
+      return false;
+  }
+
+  if (params.progressive_refine) {
+    foreach (Tile &tile, tile_manager.state.tiles) {
+      if (!tile.buffers) {
+        continue;
+      }
+
+      RenderTile rtile;
+      rtile.x = tile_manager.state.buffer.full_x + tile.x;
+      rtile.y = tile_manager.state.buffer.full_y + tile.y;
+      rtile.w = tile.w;
+      rtile.h = tile.h;
+      rtile.sample = sample;
+      rtile.buffers = tile.buffers;
+
+      if (write) {
+        if (write_render_tile_cb)
+          write_render_tile_cb(rtile);
+      }
+      else {
+        if (update_render_tile_cb)
+          update_render_tile_cb(rtile, true);
+      }
+    }
+  }
+
+  last_update_time = current_time;
+
+  return write;
+}
+
+void Session::device_free()
+{
+  scene->device_free();
+
+  tile_manager.device_free();
+
+  /* used from background render only, so no need to
+   * re-create render/display buffers here
+   */
+}
+
+void Session::collect_statistics(RenderStats *render_stats)
+{
+  scene->collect_statistics(render_stats);
+  if (params.use_profiling && (params.device.type == DEVICE_CPU)) {
+    render_stats->collect_profiling(scene, profiler);
+  }
+}
+
+int Session::get_max_closure_count()
+{
+  if (scene->shader_manager->use_osl()) {
+    /* OSL always needs the maximum as we can't predict the
+     * number of closures a shader might generate. */
+    return MAX_CLOSURE;
+  }
+
+  int max_closures = 0;
+  for (int i = 0; i < scene->shaders.size(); i++) {
+    int num_closures = scene->shaders[i]->graph->get_num_closures();
+    max_closures = max(max_closures, num_closures);
+  }
+  max_closure_global = max(max_closure_global, max_closures);
+
+  if (max_closure_global > MAX_CLOSURE) {
+    /* This is usually harmless as more complex shader tend to get many
+     * closures discarded due to mixing or low weights. We need to limit
+     * to MAX_CLOSURE as this is hardcoded in CPU/mega kernels, and it
+     * avoids excessive memory usage for split kernels. */
+    VLOG(2) << "Maximum number of closures exceeded: " << max_closure_global << " > "
+            << MAX_CLOSURE;
+
+    max_closure_global = MAX_CLOSURE;
+  }
+
+  return max_closure_global;
+}
+
+CCL_NAMESPACE_END
diff -Naur a/intern/cycles/render/session.h b/intern/cycles/render/session.h
--- a/intern/cycles/render/session.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/render/session.h	2020-01-10 20:42:43.474256721 +0300
@@ -55,6 +55,7 @@
   int start_resolution;
   int pixel_size;
   int threads;
+  bool adaptive_sampling;

   bool use_profiling;

@@ -87,6 +88,7 @@
     start_resolution = INT_MAX;
     pixel_size = 1;
     threads = 0;
+    adaptive_sampling = false;

     use_profiling = false;

@@ -114,6 +116,7 @@
              && progressive == params.progressive && experimental == params.experimental &&
              tile_size == params.tile_size && start_resolution == params.start_resolution &&
              pixel_size == params.pixel_size && threads == params.threads &&
+             adaptive_sampling == params.adaptive_sampling &&
              use_profiling == params.use_profiling &&
              display_buffer_linear == params.display_buffer_linear &&
              cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout &&
diff -Naur a/intern/cycles/render/session.h.orig b/intern/cycles/render/session.h.orig
--- a/intern/cycles/render/session.h.orig	1970-01-01 03:00:00.000000000 +0300
+++ b/intern/cycles/render/session.h.orig	2020-01-10 20:37:06.000000000 +0300
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SESSION_H__
+#define __SESSION_H__
+
+#include "render/buffers.h"
+#include "device/device.h"
+#include "render/shader.h"
+#include "render/stats.h"
+#include "render/tile.h"
+
+#include "util/util_progress.h"
+#include "util/util_stats.h"
+#include "util/util_thread.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BufferParams;
+class Device;
+class DeviceScene;
+class DeviceRequestedFeatures;
+class DisplayBuffer;
+class Progress;
+class RenderBuffers;
+class Scene;
+
+/* Session Parameters */
+
+class SessionParams {
+ public:
+  DeviceInfo device;
+  bool background;
+  bool progressive_refine;
+
+  bool progressive;
+  bool experimental;
+  int samples;
+  int2 tile_size;
+  TileOrder tile_order;
+  int start_resolution;
+  int pixel_size;
+  int threads;
+
+  bool use_profiling;
+
+  bool display_buffer_linear;
+
+  bool run_denoising;
+  bool write_denoising_passes;
+  bool full_denoising;
+  bool optix_denoising;
+  DenoiseParams denoising;
+
+  double cancel_timeout;
+  double reset_timeout;
+  double text_timeout;
+  double progressive_update_timeout;
+
+  ShadingSystem shadingsystem;
+
+  function<bool(const uchar *pixels, int width, int height, int channels)> write_render_cb;
+
+  SessionParams()
+  {
+    background = false;
+    progressive_refine = false;
+
+    progressive = false;
+    experimental = false;
+    samples = 1024;
+    tile_size = make_int2(64, 64);
+    start_resolution = INT_MAX;
+    pixel_size = 1;
+    threads = 0;
+
+    use_profiling = false;
+
+    run_denoising = false;
+    write_denoising_passes = false;
+    full_denoising = false;
+    optix_denoising = false;
+
+    display_buffer_linear = false;
+
+    cancel_timeout = 0.1;
+    reset_timeout = 0.1;
+    text_timeout = 1.0;
+    progressive_update_timeout = 1.0;
+
+    shadingsystem = SHADINGSYSTEM_SVM;
+    tile_order = TILE_CENTER;
+  }
+
+  bool modified(const SessionParams &params)
+  {
+    return !(device == params.device && background == params.background &&
+             progressive_refine == params.progressive_refine
+             /* && samples == params.samples */
+             && progressive == params.progressive && experimental == params.experimental &&
+             tile_size == params.tile_size && start_resolution == params.start_resolution &&
+             pixel_size == params.pixel_size && threads == params.threads &&
+             use_profiling == params.use_profiling &&
+             display_buffer_linear == params.display_buffer_linear &&
+             cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout &&
+             text_timeout == params.text_timeout &&
+             progressive_update_timeout == params.progressive_update_timeout &&
+             tile_order == params.tile_order && shadingsystem == params.shadingsystem);
+  }
+};
+
+/* Session
+ *
+ * This is the class that contains the session thread, running the render
+ * control loop and dispatching tasks. */
+
+class Session {
+ public:
+  Device *device;
+  Scene *scene;
+  RenderBuffers *buffers;
+  DisplayBuffer *display;
+  Progress progress;
+  SessionParams params;
+  TileManager tile_manager;
+  Stats stats;
+  Profiler profiler;
+
+  function<void(RenderTile &)> write_render_tile_cb;
+  function<void(RenderTile &, bool)> update_render_tile_cb;
+
+  explicit Session(const SessionParams &params);
+  ~Session();
+
+  void start();
+  bool draw(BufferParams &params, DeviceDrawParams &draw_params);
+  void wait();
+
+  bool ready_to_reset();
+  void reset(BufferParams &params, int samples);
+  void set_samples(int samples);
+  void set_pause(bool pause);
+
+  bool update_scene();
+  bool load_kernels(bool lock_scene = true);
+
+  void device_free();
+
+  /* Returns the rendering progress or 0 if no progress can be determined
+   * (for example, when rendering with unlimited samples). */
+  float get_progress();
+
+  void collect_statistics(RenderStats *stats);
+
+ protected:
+  struct DelayedReset {
+    thread_mutex mutex;
+    bool do_reset;
+    BufferParams params;
+    int samples;
+  } delayed_reset;
+
+  void run();
+
+  void update_status_time(bool show_pause = false, bool show_done = false);
+
+  void copy_to_display_buffer(int sample);
+  void render();
+  void reset_(BufferParams &params, int samples);
+
+  void run_cpu();
+  bool draw_cpu(BufferParams &params, DeviceDrawParams &draw_params);
+  void reset_cpu(BufferParams &params, int samples);
+
+  void run_gpu();
+  bool draw_gpu(BufferParams &params, DeviceDrawParams &draw_params);
+  void reset_gpu(BufferParams &params, int samples);
+
+  bool acquire_tile(Device *tile_device, RenderTile &tile);
+  void update_tile_sample(RenderTile &tile);
+  void release_tile(RenderTile &tile);
+
+  void map_neighbor_tiles(RenderTile *tiles, Device *tile_device);
+  void unmap_neighbor_tiles(RenderTile *tiles, Device *tile_device);
+
+  bool device_use_gl;
+
+  thread *session_thread;
+
+  volatile bool display_outdated;
+
+  volatile bool gpu_draw_ready;
+  volatile bool gpu_need_display_buffer_update;
+  thread_condition_variable gpu_need_display_buffer_update_cond;
+
+  bool pause;
+  thread_condition_variable pause_cond;
+  thread_mutex pause_mutex;
+  thread_mutex tile_mutex;
+  thread_mutex buffers_mutex;
+  thread_mutex display_mutex;
+
+  bool kernels_loaded;
+  DeviceRequestedFeatures loaded_kernel_features;
+
+  double reset_time;
+
+  /* progressive refine */
+  double last_update_time;
+  bool update_progressive_refine(bool cancel);
+
+  DeviceRequestedFeatures get_requested_device_features();
+
+  /* ** Split kernel routines ** */
+
+  /* Maximumnumber of closure during session lifetime. */
+  int max_closure_global;
+
+  /* Get maximum number of closures to be used in kernel. */
+  int get_max_closure_count();
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __SESSION_H__ */
diff -Naur a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
--- a/intern/cycles/util/util_atomic.h	2020-01-10 20:37:06.000000000 +0300
+++ b/intern/cycles/util/util_atomic.h	2020-01-10 20:42:43.474256721 +0300
@@ -77,6 +77,7 @@
 #    define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
 #    define atomic_fetch_and_inc_uint32(p) atomic_inc((p))
 #    define atomic_fetch_and_dec_uint32(p) atomic_dec((p))
+#    define atomic_fetch_and_or_uint32(p, x) atomic_or((p), (x))

 #    define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
 #    define ccl_barrier(flags) barrier(flags)
@@ -91,6 +92,7 @@
 #    define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x))
 #    define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
 #    define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
+#    define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x))

 ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest,
                                                       const float old_val,