Added GPU based cluster builder

Clustering is now GPU based, uses an implementation based on the Activision algorithm.
This commit is contained in:
reduz 2021-01-17 13:25:38 -03:00 committed by Juan Linietsky
parent 7008e3c6ea
commit 099dee35f4
35 changed files with 2753 additions and 1400 deletions

View file

@ -74,6 +74,15 @@ Plane CameraMatrix::xform4(const Plane &p_vec4) const {
return ret;
}
void CameraMatrix::adjust_perspective_znear(real_t p_new_znear) {
real_t zfar = get_z_far();
real_t znear = p_new_znear;
real_t deltaZ = zfar - znear;
matrix[2][2] = -(zfar + znear) / deltaZ;
matrix[3][2] = -2 * znear * zfar / deltaZ;
}
void CameraMatrix::set_perspective(real_t p_fovy_degrees, real_t p_aspect, real_t p_z_near, real_t p_z_far, bool p_flip_fov) {
if (p_flip_fov) {
p_fovy_degrees = get_fovy(p_fovy_degrees, 1.0 / p_aspect);

View file

@ -59,6 +59,7 @@ struct CameraMatrix {
void set_orthogonal(real_t p_size, real_t p_aspect, real_t p_znear, real_t p_zfar, bool p_flip_fov = false);
void set_frustum(real_t p_left, real_t p_right, real_t p_bottom, real_t p_top, real_t p_near, real_t p_far);
void set_frustum(real_t p_size, real_t p_aspect, Vector2 p_offset, real_t p_near, real_t p_far, bool p_flip_fov = false);
void adjust_perspective_znear(real_t p_new_znear);
static real_t get_fovy(real_t p_fovx, real_t p_aspect) {
return Math::rad2deg(Math::atan(p_aspect * Math::tan(Math::deg2rad(p_fovx) * 0.5)) * 2.0);

View file

@ -3323,11 +3323,8 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_c
return id;
}
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create_empty(const Size2i &p_size) {
ERR_FAIL_COND_V(p_size.width <= 0 || p_size.height <= 0, INVALID_FORMAT_ID);
RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_create_empty(TextureSamples p_samples) {
FramebufferFormatKey key;
key.empty_size = p_size;
const Map<FramebufferFormatKey, FramebufferFormatID>::Element *E = framebuffer_format_cache.find(key);
if (E) {
@ -3375,7 +3372,7 @@ RenderingDevice::FramebufferFormatID RenderingDeviceVulkan::framebuffer_format_c
fb_format.E = E;
fb_format.color_attachments = 0;
fb_format.render_pass = render_pass;
fb_format.samples = TEXTURE_SAMPLES_1;
fb_format.samples = p_samples;
framebuffer_formats[id] = fb_format;
return id;
}
@ -3391,10 +3388,10 @@ RenderingDevice::TextureSamples RenderingDeviceVulkan::framebuffer_format_get_te
/**** RENDER TARGET ****/
/***********************/
RID RenderingDeviceVulkan::framebuffer_create_empty(const Size2i &p_size, FramebufferFormatID p_format_check) {
RID RenderingDeviceVulkan::framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples, FramebufferFormatID p_format_check) {
_THREAD_SAFE_METHOD_
Framebuffer framebuffer;
framebuffer.format_id = framebuffer_format_create_empty(p_size);
framebuffer.format_id = framebuffer_format_create_empty(p_samples);
ERR_FAIL_COND_V(p_format_check != INVALID_FORMAT_ID && framebuffer.format_id != p_format_check, RID());
framebuffer.size = p_size;
@ -5074,6 +5071,40 @@ Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint
return err;
}
Error RenderingDeviceVulkan::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, bool p_sync_with_draw) {
_THREAD_SAFE_METHOD_
ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER,
"Size must be a multiple of four");
ERR_FAIL_COND_V_MSG(draw_list && p_sync_with_draw, ERR_INVALID_PARAMETER,
"Updating buffers in 'sync to draw' mode is forbidden during creation of a draw list");
ERR_FAIL_COND_V_MSG(compute_list && p_sync_with_draw, ERR_INVALID_PARAMETER,
"Updating buffers in 'sync to draw' mode is forbidden during creation of a compute list");
// Protect subsequent updates...
VkPipelineStageFlags dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
VkAccessFlags dst_access = VK_ACCESS_TRANSFER_WRITE_BIT;
Buffer *buffer = _get_buffer_from_owner(p_buffer, dst_stage_mask, dst_access);
if (!buffer) {
ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type.");
}
ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER,
"Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end.");
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, dst_stage_mask, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_access, VK_ACCESS_TRANSFER_WRITE_BIT, p_sync_with_draw);
vkCmdFillBuffer(p_sync_with_draw ? frames[frame].draw_command_buffer : frames[frame].setup_command_buffer, buffer->buffer, p_offset, p_size, 0);
#ifdef FORCE_FULL_BARRIER
_full_barrier(p_sync_with_draw);
#else
_buffer_memory_barrier(buffer->buffer, p_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, p_sync_with_draw);
#endif
return OK;
}
Vector<uint8_t> RenderingDeviceVulkan::buffer_get_data(RID p_buffer) {
_THREAD_SAFE_METHOD_

View file

@ -228,13 +228,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
// used for the render pipelines.
struct FramebufferFormatKey {
Size2i empty_size;
Vector<AttachmentFormat> attachments;
bool operator<(const FramebufferFormatKey &p_key) const {
if (empty_size != p_key.empty_size) {
return empty_size < p_key.empty_size;
}
int as = attachments.size();
int bs = p_key.attachments.size();
if (as != bs) {
@ -934,11 +929,11 @@ public:
/*********************/
virtual FramebufferFormatID framebuffer_format_create(const Vector<AttachmentFormat> &p_format);
virtual FramebufferFormatID framebuffer_format_create_empty(const Size2i &p_size);
virtual FramebufferFormatID framebuffer_format_create_empty(TextureSamples p_samples = TEXTURE_SAMPLES_1);
virtual TextureSamples framebuffer_format_get_texture_samples(FramebufferFormatID p_format);
virtual RID framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check = INVALID_ID);
virtual RID framebuffer_create_empty(const Size2i &p_size, FramebufferFormatID p_format_check = INVALID_ID);
virtual RID framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples = TEXTURE_SAMPLES_1, FramebufferFormatID p_format_check = INVALID_ID);
virtual FramebufferFormatID framebuffer_get_format(RID p_framebuffer);
@ -981,6 +976,7 @@ public:
virtual bool uniform_set_is_valid(RID p_uniform_set);
virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_sync_with_draw = false); //works for any buffer
virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, bool p_sync_with_draw = false);
virtual Vector<uint8_t> buffer_get_data(RID p_buffer);
/*************************/

View file

@ -3043,7 +3043,11 @@ void Node3DEditorViewport::_menu_option(int p_option) {
case VIEW_DISPLAY_DEBUG_SDFGI:
case VIEW_DISPLAY_DEBUG_SDFGI_PROBES:
case VIEW_DISPLAY_DEBUG_GI_BUFFER:
case VIEW_DISPLAY_DEBUG_DISABLE_LOD: {
case VIEW_DISPLAY_DEBUG_DISABLE_LOD:
case VIEW_DISPLAY_DEBUG_CLUSTER_OMNI_LIGHTS:
case VIEW_DISPLAY_DEBUG_CLUSTER_SPOT_LIGHTS:
case VIEW_DISPLAY_DEBUG_CLUSTER_DECALS:
case VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES: {
static const int display_options[] = {
VIEW_DISPLAY_NORMAL,
VIEW_DISPLAY_WIREFRAME,
@ -3065,6 +3069,10 @@ void Node3DEditorViewport::_menu_option(int p_option) {
VIEW_DISPLAY_DEBUG_DECAL_ATLAS,
VIEW_DISPLAY_DEBUG_SDFGI,
VIEW_DISPLAY_DEBUG_SDFGI_PROBES,
VIEW_DISPLAY_DEBUG_CLUSTER_OMNI_LIGHTS,
VIEW_DISPLAY_DEBUG_CLUSTER_SPOT_LIGHTS,
VIEW_DISPLAY_DEBUG_CLUSTER_DECALS,
VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES,
VIEW_MAX
};
static const Viewport::DebugDraw debug_draw_modes[] = {
@ -3088,6 +3096,10 @@ void Node3DEditorViewport::_menu_option(int p_option) {
Viewport::DEBUG_DRAW_DECAL_ATLAS,
Viewport::DEBUG_DRAW_SDFGI,
Viewport::DEBUG_DRAW_SDFGI_PROBES,
Viewport::DEBUG_DRAW_CLUSTER_OMNI_LIGHTS,
Viewport::DEBUG_DRAW_CLUSTER_SPOT_LIGHTS,
Viewport::DEBUG_DRAW_CLUSTER_DECALS,
Viewport::DEBUG_DRAW_CLUSTER_REFLECTION_PROBES,
};
int idx = 0;
@ -3991,6 +4003,12 @@ Node3DEditorViewport::Node3DEditorViewport(Node3DEditor *p_spatial_editor, Edito
display_submenu->add_radio_check_item(TTR("GI Buffer"), VIEW_DISPLAY_DEBUG_GI_BUFFER);
display_submenu->add_separator();
display_submenu->add_radio_check_item(TTR("Disable LOD"), VIEW_DISPLAY_DEBUG_DISABLE_LOD);
display_submenu->add_separator();
display_submenu->add_radio_check_item(TTR("Omni Light Cluster"), VIEW_DISPLAY_DEBUG_CLUSTER_OMNI_LIGHTS);
display_submenu->add_radio_check_item(TTR("Spot Light Cluster"), VIEW_DISPLAY_DEBUG_CLUSTER_SPOT_LIGHTS);
display_submenu->add_radio_check_item(TTR("Decal Cluster"), VIEW_DISPLAY_DEBUG_CLUSTER_DECALS);
display_submenu->add_radio_check_item(TTR("Reflection Probe Cluster"), VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES);
display_submenu->set_name("display_advanced");
view_menu->get_popup()->add_submenu_item(TTR("Display Advanced..."), "display_advanced", VIEW_DISPLAY_ADVANCED);
view_menu->get_popup()->add_separator();

View file

@ -213,6 +213,11 @@ class Node3DEditorViewport : public Control {
VIEW_DISPLAY_DEBUG_SDFGI_PROBES,
VIEW_DISPLAY_DEBUG_GI_BUFFER,
VIEW_DISPLAY_DEBUG_DISABLE_LOD,
VIEW_DISPLAY_DEBUG_CLUSTER_OMNI_LIGHTS,
VIEW_DISPLAY_DEBUG_CLUSTER_SPOT_LIGHTS,
VIEW_DISPLAY_DEBUG_CLUSTER_DECALS,
VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES,
VIEW_LOCK_ROTATION,
VIEW_CINEMATIC_PREVIEW,
VIEW_AUTO_ORTHOGONAL,

View file

@ -53,7 +53,7 @@ static Vector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_stage
int ClientInputSemanticsVersion = 100; // maps to, say, #define VULKAN 100
glslang::EShTargetClientVersion VulkanClientVersion = glslang::EShTargetVulkan_1_0;
glslang::EShTargetLanguageVersion TargetVersion = glslang::EShTargetSpv_1_0;
glslang::EShTargetLanguageVersion TargetVersion = glslang::EShTargetSpv_1_3;
glslang::TShader::ForbidIncluder includer;
glslang::TShader shader(stages[p_stage]);

View file

@ -212,6 +212,10 @@ void Light3D::_validate_property(PropertyInfo &property) const {
property.usage = 0;
}
if (get_light_type() == RS::LIGHT_DIRECTIONAL && property.name == "light_specular") {
property.usage = 0;
}
if (get_light_type() == RS::LIGHT_DIRECTIONAL && property.name == "light_projector") {
property.usage = 0;
}

View file

@ -3605,6 +3605,10 @@ void Viewport::_bind_methods() {
BIND_ENUM_CONSTANT(DEBUG_DRAW_SDFGI_PROBES);
BIND_ENUM_CONSTANT(DEBUG_DRAW_GI_BUFFER);
BIND_ENUM_CONSTANT(DEBUG_DRAW_DISABLE_LOD);
BIND_ENUM_CONSTANT(DEBUG_DRAW_CLUSTER_OMNI_LIGHTS);
BIND_ENUM_CONSTANT(DEBUG_DRAW_CLUSTER_SPOT_LIGHTS);
BIND_ENUM_CONSTANT(DEBUG_DRAW_CLUSTER_DECALS);
BIND_ENUM_CONSTANT(DEBUG_DRAW_CLUSTER_REFLECTION_PROBES);
BIND_ENUM_CONSTANT(DEFAULT_CANVAS_ITEM_TEXTURE_FILTER_NEAREST);
BIND_ENUM_CONSTANT(DEFAULT_CANVAS_ITEM_TEXTURE_FILTER_LINEAR);

View file

@ -143,6 +143,10 @@ public:
DEBUG_DRAW_SDFGI_PROBES,
DEBUG_DRAW_GI_BUFFER,
DEBUG_DRAW_DISABLE_LOD,
DEBUG_DRAW_CLUSTER_OMNI_LIGHTS,
DEBUG_DRAW_CLUSTER_SPOT_LIGHTS,
DEBUG_DRAW_CLUSTER_DECALS,
DEBUG_DRAW_CLUSTER_REFLECTION_PROBES,
};
enum DefaultCanvasItemTextureFilter {

View file

@ -0,0 +1,550 @@
/*************************************************************************/
/* cluster_builder_rd.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "cluster_builder_rd.h"
#include "servers/rendering/rendering_device.h"
#include "servers/rendering/rendering_server_globals.h"
ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() {
RD::VertexFormatID vertex_format;
{
Vector<RD::VertexAttribute> attributes;
{
RD::VertexAttribute va;
va.format = RD::DATA_FORMAT_R32G32B32_SFLOAT;
va.stride = sizeof(float) * 3;
attributes.push_back(va);
}
vertex_format = RD::get_singleton()->vertex_format_create(attributes);
}
{
Vector<String> versions;
versions.push_back("");
cluster_render.cluster_render_shader.initialize(versions);
cluster_render.shader_version = cluster_render.cluster_render_shader.version_create();
cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0);
cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
RD::PipelineMultisampleState ms;
ms.sample_count = RD::TEXTURE_SAMPLES_4;
cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
}
{
Vector<String> versions;
versions.push_back("");
cluster_store.cluster_store_shader.initialize(versions);
cluster_store.shader_version = cluster_store.cluster_store_shader.version_create();
cluster_store.shader = cluster_store.cluster_store_shader.version_get_shader(cluster_store.shader_version, 0);
cluster_store.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_store.shader);
}
{
Vector<String> versions;
versions.push_back("");
cluster_debug.cluster_debug_shader.initialize(versions);
cluster_debug.shader_version = cluster_debug.cluster_debug_shader.version_create();
cluster_debug.shader = cluster_debug.cluster_debug_shader.version_get_shader(cluster_debug.shader_version, 0);
cluster_debug.shader_pipeline = RD::get_singleton()->compute_pipeline_create(cluster_debug.shader);
}
{ // SPHERE
static const uint32_t icosphere_vertex_count = 42;
static const float icosphere_vertices[icosphere_vertex_count * 3] = {
0, 0, -1, 0.7236073, -0.5257253, -0.4472195, -0.276388, -0.8506492, -0.4472199, -0.8944262, 0, -0.4472156, -0.276388, 0.8506492, -0.4472199, 0.7236073, 0.5257253, -0.4472195, 0.276388, -0.8506492, 0.4472199, -0.7236073, -0.5257253, 0.4472195, -0.7236073, 0.5257253, 0.4472195, 0.276388, 0.8506492, 0.4472199, 0.8944262, 0, 0.4472156, 0, 0, 1, -0.1624555, -0.4999952, -0.8506544, 0.4253227, -0.3090114, -0.8506542, 0.2628688, -0.8090116, -0.5257377, 0.8506479, 0, -0.5257359, 0.4253227, 0.3090114, -0.8506542, -0.5257298, 0, -0.8506517, -0.6881894, -0.4999969, -0.5257362, -0.1624555, 0.4999952, -0.8506544, -0.6881894, 0.4999969, -0.5257362, 0.2628688, 0.8090116, -0.5257377, 0.9510579, -0.3090126, 0, 0.9510579, 0.3090126, 0, 0, -1, 0, 0.5877856, -0.8090167, 0, -0.9510579, -0.3090126, 0, -0.5877856, -0.8090167, 0, -0.5877856, 0.8090167, 0, -0.9510579, 0.3090126, 0, 0.5877856, 0.8090167, 0, 0, 1, 0, 0.6881894, -0.4999969, 0.5257362, -0.2628688, -0.8090116, 0.5257377, -0.8506479, 0, 0.5257359, -0.2628688, 0.8090116, 0.5257377, 0.6881894, 0.4999969, 0.5257362, 0.1624555, -0.4999952, 0.8506544, 0.5257298, 0, 0.8506517, -0.4253227, -0.3090114, 0.8506542, -0.4253227, 0.3090114, 0.8506542, 0.1624555, 0.4999952, 0.8506544
};
static const uint32_t icosphere_triangle_count = 80;
static const uint32_t icosphere_triangle_indices[icosphere_triangle_count * 3] = {
0, 13, 12, 1, 13, 15, 0, 12, 17, 0, 17, 19, 0, 19, 16, 1, 15, 22, 2, 14, 24, 3, 18, 26, 4, 20, 28, 5, 21, 30, 1, 22, 25, 2, 24, 27, 3, 26, 29, 4, 28, 31, 5, 30, 23, 6, 32, 37, 7, 33, 39, 8, 34, 40, 9, 35, 41, 10, 36, 38, 38, 41, 11, 38, 36, 41, 36, 9, 41, 41, 40, 11, 41, 35, 40, 35, 8, 40, 40, 39, 11, 40, 34, 39, 34, 7, 39, 39, 37, 11, 39, 33, 37, 33, 6, 37, 37, 38, 11, 37, 32, 38, 32, 10, 38, 23, 36, 10, 23, 30, 36, 30, 9, 36, 31, 35, 9, 31, 28, 35, 28, 8, 35, 29, 34, 8, 29, 26, 34, 26, 7, 34, 27, 33, 7, 27, 24, 33, 24, 6, 33, 25, 32, 6, 25, 22, 32, 22, 10, 32, 30, 31, 9, 30, 21, 31, 21, 4, 31, 28, 29, 8, 28, 20, 29, 20, 3, 29, 26, 27, 7, 26, 18, 27, 18, 2, 27, 24, 25, 6, 24, 14, 25, 14, 1, 25, 22, 23, 10, 22, 15, 23, 15, 5, 23, 16, 21, 5, 16, 19, 21, 19, 4, 21, 19, 20, 4, 19, 17, 20, 17, 3, 20, 17, 18, 3, 17, 12, 18, 12, 2, 18, 15, 16, 5, 15, 13, 16, 13, 0, 16, 12, 14, 2, 12, 13, 14, 13, 1, 14
};
Vector<uint8_t> vertex_data;
vertex_data.resize(sizeof(float) * icosphere_vertex_count * 3);
copymem(vertex_data.ptrw(), icosphere_vertices, vertex_data.size());
sphere_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data);
Vector<uint8_t> index_data;
index_data.resize(sizeof(uint32_t) * icosphere_triangle_count * 3);
copymem(index_data.ptrw(), icosphere_triangle_indices, index_data.size());
sphere_index_buffer = RD::get_singleton()->index_buffer_create(icosphere_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data);
Vector<RID> buffers;
buffers.push_back(sphere_vertex_buffer);
sphere_vertex_array = RD::get_singleton()->vertex_array_create(icosphere_vertex_count, vertex_format, buffers);
sphere_index_array = RD::get_singleton()->index_array_create(sphere_index_buffer, 0, icosphere_triangle_count * 3);
float min_d = 1e20;
for (uint32_t i = 0; i < icosphere_triangle_count; i++) {
Vector3 vertices[3];
for (uint32_t j = 0; j < 3; j++) {
uint32_t index = icosphere_triangle_indices[i * 3 + j];
for (uint32_t k = 0; k < 3; k++) {
vertices[j][k] = icosphere_vertices[index * 3 + k];
}
}
Plane p(vertices[0], vertices[1], vertices[2]);
min_d = MIN(Math::abs(p.d), min_d);
}
sphere_overfit = 1.0 / min_d;
}
{ // CONE
static const uint32_t cone_vertex_count = 99;
static const float cone_vertices[cone_vertex_count * 3] = {
0, 1, -1, 0.1950903, 0.9807853, -1, 0.3826835, 0.9238795, -1, 0.5555703, 0.8314696, -1, 0.7071068, 0.7071068, -1, 0.8314697, 0.5555702, -1, 0.9238795, 0.3826834, -1, 0.9807853, 0.1950903, -1, 1, 0, -1, 0.9807853, -0.1950902, -1, 0.9238796, -0.3826833, -1, 0.8314697, -0.5555702, -1, 0.7071068, -0.7071068, -1, 0.5555702, -0.8314697, -1, 0.3826833, -0.9238796, -1, 0.1950901, -0.9807853, -1, -3.25841e-7, -1, -1, -0.1950907, -0.9807852, -1, -0.3826839, -0.9238793, -1, -0.5555707, -0.8314693, -1, -0.7071073, -0.7071063, -1, -0.83147, -0.5555697, -1, -0.9238799, -0.3826827, -1, 0, 0, 0, -0.9807854, -0.1950894, -1, -1, 9.65599e-7, -1, -0.9807851, 0.1950913, -1, -0.9238791, 0.3826845, -1, -0.8314689, 0.5555713, -1, -0.7071059, 0.7071077, -1, -0.5555691, 0.8314704, -1, -0.3826821, 0.9238801, -1, -0.1950888, 0.9807856, -1
};
static const uint32_t cone_triangle_count = 62;
static const uint32_t cone_triangle_indices[cone_triangle_count * 3] = {
0, 23, 1, 1, 23, 2, 2, 23, 3, 3, 23, 4, 4, 23, 5, 5, 23, 6, 6, 23, 7, 7, 23, 8, 8, 23, 9, 9, 23, 10, 10, 23, 11, 11, 23, 12, 12, 23, 13, 13, 23, 14, 14, 23, 15, 15, 23, 16, 16, 23, 17, 17, 23, 18, 18, 23, 19, 19, 23, 20, 20, 23, 21, 21, 23, 22, 22, 23, 24, 24, 23, 25, 25, 23, 26, 26, 23, 27, 27, 23, 28, 28, 23, 29, 29, 23, 30, 30, 23, 31, 31, 23, 32, 32, 23, 0, 7, 15, 24, 32, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 3, 6, 7, 3, 7, 8, 9, 9, 10, 7, 10, 11, 7, 11, 12, 15, 12, 13, 15, 13, 14, 15, 15, 16, 17, 17, 18, 19, 19, 20, 24, 20, 21, 24, 21, 22, 24, 24, 25, 26, 26, 27, 28, 28, 29, 30, 30, 31, 32, 32, 1, 3, 15, 17, 24, 17, 19, 24, 24, 26, 32, 26, 28, 32, 28, 30, 32, 32, 3, 7, 7, 11, 15, 32, 7, 24
};
Vector<uint8_t> vertex_data;
vertex_data.resize(sizeof(float) * cone_vertex_count * 3);
copymem(vertex_data.ptrw(), cone_vertices, vertex_data.size());
cone_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data);
Vector<uint8_t> index_data;
index_data.resize(sizeof(uint32_t) * cone_triangle_count * 3);
copymem(index_data.ptrw(), cone_triangle_indices, index_data.size());
cone_index_buffer = RD::get_singleton()->index_buffer_create(cone_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data);
Vector<RID> buffers;
buffers.push_back(cone_vertex_buffer);
cone_vertex_array = RD::get_singleton()->vertex_array_create(cone_vertex_count, vertex_format, buffers);
cone_index_array = RD::get_singleton()->index_array_create(cone_index_buffer, 0, cone_triangle_count * 3);
float min_d = 1e20;
for (uint32_t i = 0; i < cone_triangle_count; i++) {
Vector3 vertices[3];
int32_t zero_index = -1;
for (uint32_t j = 0; j < 3; j++) {
uint32_t index = cone_triangle_indices[i * 3 + j];
for (uint32_t k = 0; k < 3; k++) {
vertices[j][k] = cone_vertices[index * 3 + k];
}
if (vertices[j] == Vector3()) {
zero_index = j;
}
}
if (zero_index != -1) {
Vector3 a = vertices[(zero_index + 1) % 3];
Vector3 b = vertices[(zero_index + 2) % 3];
Vector3 c = a + Vector3(0, 0, 1);
Plane p(a, b, c);
min_d = MIN(Math::abs(p.d), min_d);
}
}
cone_overfit = 1.0 / min_d;
}
{ // BOX
static const uint32_t box_vertex_count = 8;
static const float box_vertices[box_vertex_count * 3] = {
-1, -1, -1, -1, -1, 1, -1, 1, -1, -1, 1, 1, 1, -1, -1, 1, -1, 1, 1, 1, -1, 1, 1, 1
};
static const uint32_t box_triangle_count = 12;
static const uint32_t box_triangle_indices[box_triangle_count * 3] = {
1, 2, 0, 3, 6, 2, 7, 4, 6, 5, 0, 4, 6, 0, 2, 3, 5, 7, 1, 3, 2, 3, 7, 6, 7, 5, 4, 5, 1, 0, 6, 4, 0, 3, 1, 5
};
Vector<uint8_t> vertex_data;
vertex_data.resize(sizeof(float) * box_vertex_count * 3);
copymem(vertex_data.ptrw(), box_vertices, vertex_data.size());
box_vertex_buffer = RD::get_singleton()->vertex_buffer_create(vertex_data.size(), vertex_data);
Vector<uint8_t> index_data;
index_data.resize(sizeof(uint32_t) * box_triangle_count * 3);
copymem(index_data.ptrw(), box_triangle_indices, index_data.size());
box_index_buffer = RD::get_singleton()->index_buffer_create(box_triangle_count * 3, RD::INDEX_BUFFER_FORMAT_UINT32, index_data);
Vector<RID> buffers;
buffers.push_back(box_vertex_buffer);
box_vertex_array = RD::get_singleton()->vertex_array_create(box_vertex_count, vertex_format, buffers);
box_index_array = RD::get_singleton()->index_array_create(box_index_buffer, 0, box_triangle_count * 3);
}
}
ClusterBuilderSharedDataRD::~ClusterBuilderSharedDataRD() {
RD::get_singleton()->free(sphere_vertex_buffer);
RD::get_singleton()->free(sphere_index_buffer);
RD::get_singleton()->free(cone_vertex_buffer);
RD::get_singleton()->free(cone_index_buffer);
RD::get_singleton()->free(box_vertex_buffer);
RD::get_singleton()->free(box_index_buffer);
cluster_render.cluster_render_shader.version_free(cluster_render.shader_version);
cluster_store.cluster_store_shader.version_free(cluster_store.shader_version);
cluster_debug.cluster_debug_shader.version_free(cluster_debug.shader_version);
}
/////////////////////////////
void ClusterBuilderRD::_clear() {
if (cluster_buffer.is_null()) {
return; //nothing to clear
}
RD::get_singleton()->free(cluster_buffer);
RD::get_singleton()->free(cluster_render_buffer);
RD::get_singleton()->free(element_buffer);
cluster_buffer = RID();
cluster_render_buffer = RID();
element_buffer = RID();
memfree(render_elements);
render_elements = nullptr;
render_element_max = 0;
render_element_count = 0;
RD::get_singleton()->free(framebuffer);
framebuffer = RID();
cluster_render_uniform_set = RID();
cluster_store_uniform_set = RID();
}
void ClusterBuilderRD::setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer) {
ERR_FAIL_COND(p_max_elements == 0);
ERR_FAIL_COND(p_screen_size.x < 1);
ERR_FAIL_COND(p_screen_size.y < 1);
_clear();
screen_size = p_screen_size;
cluster_screen_size.width = (p_screen_size.width - 1) / cluster_size + 1;
cluster_screen_size.height = (p_screen_size.height - 1) / cluster_size + 1;
max_elements_by_type = p_max_elements;
if (max_elements_by_type % 32) { //need to be 32 aligned
max_elements_by_type += 32 - (max_elements_by_type % 32);
}
cluster_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (max_elements_by_type / 32 + 32) * ELEMENT_TYPE_MAX * 4;
render_element_max = max_elements_by_type * ELEMENT_TYPE_MAX;
uint32_t element_tag_bits_size = render_element_max / 32;
uint32_t element_tag_depth_bits_size = render_element_max;
cluster_render_buffer_size = cluster_screen_size.x * cluster_screen_size.y * (element_tag_bits_size + element_tag_depth_bits_size) * 4; // tag bits (element was used) and tag depth (depth range in which it was used)
cluster_render_buffer = RD::get_singleton()->storage_buffer_create(cluster_render_buffer_size);
cluster_buffer = RD::get_singleton()->storage_buffer_create(cluster_buffer_size);
render_elements = (RenderElementData *)memalloc(sizeof(RenderElementData *) * render_element_max);
render_element_count = 0;
element_buffer = RD::get_singleton()->storage_buffer_create(sizeof(RenderElementData) * render_element_max);
uint32_t div_value = 1 << divisor;
if (use_msaa) {
framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value, RD::TEXTURE_SAMPLES_4);
} else {
framebuffer = RD::get_singleton()->framebuffer_create_empty(p_screen_size / div_value);
}
{
Vector<RD::Uniform> uniforms;
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
u.binding = 1;
u.ids.push_back(state_uniform);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 2;
u.ids.push_back(element_buffer);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 3;
u.ids.push_back(cluster_render_buffer);
uniforms.push_back(u);
}
cluster_render_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_render.shader, 0);
}
{
Vector<RD::Uniform> uniforms;
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 1;
u.ids.push_back(cluster_render_buffer);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 2;
u.ids.push_back(cluster_buffer);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 3;
u.ids.push_back(element_buffer);
uniforms.push_back(u);
}
cluster_store_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_store.shader, 0);
}
if (p_color_buffer.is_valid()) {
Vector<RD::Uniform> uniforms;
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 1;
u.ids.push_back(cluster_buffer);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
u.binding = 2;
u.ids.push_back(p_color_buffer);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
u.binding = 3;
u.ids.push_back(p_depth_buffer);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_SAMPLER;
u.binding = 4;
u.ids.push_back(p_depth_buffer_sampler);
uniforms.push_back(u);
}
debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shared->cluster_debug.shader, 0);
} else {
debug_uniform_set = RID();
}
}
void ClusterBuilderRD::begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y) {
view_xform = p_view_transform.affine_inverse();
projection = p_cam_projection;
z_near = projection.get_z_near();
z_far = projection.get_z_far();
orthogonal = p_cam_projection.is_orthogonal();
adjusted_projection = projection;
if (!orthogonal) {
adjusted_projection.adjust_perspective_znear(0.0001);
}
CameraMatrix correction;
correction.set_depth_correction(p_flip_y);
projection = correction * projection;
adjusted_projection = correction * adjusted_projection;
//reset counts
render_element_count = 0;
for (uint32_t i = 0; i < ELEMENT_TYPE_MAX; i++) {
cluster_count_by_type[i] = 0;
}
}
void ClusterBuilderRD::bake_cluster() {
RENDER_TIMESTAMP(">Bake Cluster");
//clear cluster buffer
RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, true);
if (render_element_count > 0) {
//clear render buffer
RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, true);
{ //fill state uniform
StateUniform state;
RendererStorageRD::store_camera(adjusted_projection, state.projection);
state.inv_z_far = 1.0 / z_far;
state.screen_to_clusters_shift = get_shift_from_power_of_2(cluster_size);
state.screen_to_clusters_shift -= divisor; //screen is smaller, shift one less
state.cluster_screen_width = cluster_screen_size.x;
state.cluster_depth_offset = (render_element_max / 32);
state.cluster_data_size = state.cluster_depth_offset + render_element_max;
RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, true);
}
//update instances
RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, true);
RENDER_TIMESTAMP("Render Elements");
//render elements
{
RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD);
ClusterBuilderSharedDataRD::ClusterRender::PushConstant push_constant = {};
RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shared->cluster_render.shader_pipelines[use_msaa ? ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_MSAA : ClusterBuilderSharedDataRD::ClusterRender::PIPELINE_NORMAL]);
RD::get_singleton()->draw_list_bind_uniform_set(draw_list, cluster_render_uniform_set, 0);
for (uint32_t i = 0; i < render_element_count;) {
push_constant.base_index = i;
switch (render_elements[i].type) {
case ELEMENT_TYPE_OMNI_LIGHT: {
RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->sphere_vertex_array);
RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->sphere_index_array);
} break;
case ELEMENT_TYPE_SPOT_LIGHT: {
RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->cone_vertex_array);
RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->cone_index_array);
} break;
case ELEMENT_TYPE_DECAL:
case ELEMENT_TYPE_REFLECTION_PROBE: {
RD::get_singleton()->draw_list_bind_vertex_array(draw_list, shared->box_vertex_array);
RD::get_singleton()->draw_list_bind_index_array(draw_list, shared->box_index_array);
} break;
}
RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterRender::PushConstant));
uint32_t instances = 1;
#if 0
for (uint32_t j = i+1; j < element_count; j++) {
if (elements[i].type!=elements[j].type) {
break;
}
instances++;
}
#endif
RD::get_singleton()->draw_list_draw(draw_list, true, instances);
i += instances;
}
RD::get_singleton()->draw_list_end();
}
//store elements
RENDER_TIMESTAMP("Pack Elements");
{
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_store.shader_pipeline);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cluster_store_uniform_set, 0);
ClusterBuilderSharedDataRD::ClusterStore::PushConstant push_constant;
push_constant.cluster_render_data_size = render_element_max / 32 + render_element_max;
push_constant.max_render_element_count_div_32 = render_element_max / 32;
push_constant.cluster_screen_size[0] = cluster_screen_size.x;
push_constant.cluster_screen_size[1] = cluster_screen_size.y;
push_constant.render_element_count_div_32 = render_element_count > 0 ? (render_element_count - 1) / 32 + 1 : 0;
push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32;
push_constant.pad1 = 0;
push_constant.pad2 = 0;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1, 8, 8, 1);
RD::get_singleton()->compute_list_end();
}
}
RENDER_TIMESTAMP("<Bake Cluster");
}
void ClusterBuilderRD::debug(ElementType p_element) {
ERR_FAIL_COND(debug_uniform_set.is_null());
RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, shared->cluster_debug.shader_pipeline);
RD::get_singleton()->compute_list_bind_uniform_set(compute_list, debug_uniform_set, 0);
ClusterBuilderSharedDataRD::ClusterDebug::PushConstant push_constant;
push_constant.screen_size[0] = screen_size.x;
push_constant.screen_size[1] = screen_size.y;
push_constant.cluster_screen_size[0] = cluster_screen_size.x;
push_constant.cluster_screen_size[1] = cluster_screen_size.y;
push_constant.cluster_shift = get_shift_from_power_of_2(cluster_size);
push_constant.cluster_type = p_element;
push_constant.orthogonal = orthogonal;
push_constant.z_far = z_far;
push_constant.z_near = z_near;
push_constant.max_cluster_element_count_div_32 = max_elements_by_type / 32;
RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant));
RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1, 8, 8, 1);
RD::get_singleton()->compute_list_end();
}
RID ClusterBuilderRD::get_cluster_buffer() const {
return cluster_buffer;
}
uint32_t ClusterBuilderRD::get_cluster_size() const {
return cluster_size;
}
uint32_t ClusterBuilderRD::get_max_cluster_elements() const {
return max_elements_by_type;
}
void ClusterBuilderRD::set_shared(ClusterBuilderSharedDataRD *p_shared) {
shared = p_shared;
}
ClusterBuilderRD::ClusterBuilderRD() {
state_uniform = RD::get_singleton()->uniform_buffer_create(sizeof(StateUniform));
}
ClusterBuilderRD::~ClusterBuilderRD() {
_clear();
RD::get_singleton()->free(state_uniform);
}

View file

@ -0,0 +1,378 @@
/*************************************************************************/
/* cluster_builder_rd.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#ifndef CLUSTER_BUILDER_RD_H
#define CLUSTER_BUILDER_RD_H
#include "servers/rendering/renderer_rd/renderer_storage_rd.h"
#include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h"
#include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h"
#include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h"
class ClusterBuilderSharedDataRD {
friend class ClusterBuilderRD;
RID sphere_vertex_buffer;
RID sphere_vertex_array;
RID sphere_index_buffer;
RID sphere_index_array;
float sphere_overfit = 0.0; //because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area
RID cone_vertex_buffer;
RID cone_vertex_array;
RID cone_index_buffer;
RID cone_index_array;
float cone_overfit = 0.0; //because an cone mesh is not a perfect sphere, we need to enlarge it to cover the actual cone area
RID box_vertex_buffer;
RID box_vertex_array;
RID box_index_buffer;
RID box_index_array;
enum Divisor {
DIVISOR_1,
DIVISOR_2,
DIVISOR_4,
};
struct ClusterRender {
struct PushConstant {
uint32_t base_index;
uint32_t pad0;
uint32_t pad1;
uint32_t pad2;
};
ClusterRenderShaderRD cluster_render_shader;
RID shader_version;
RID shader;
enum PipelineVersion {
PIPELINE_NORMAL,
PIPELINE_MSAA,
PIPELINE_MAX
};
RID shader_pipelines[PIPELINE_MAX];
} cluster_render;
struct ClusterStore {
struct PushConstant {
uint32_t cluster_render_data_size; // how much data for a single cluster takes
uint32_t max_render_element_count_div_32; //divided by 32
uint32_t cluster_screen_size[2];
uint32_t render_element_count_div_32; //divided by 32
uint32_t max_cluster_element_count_div_32; //divided by 32
uint32_t pad1;
uint32_t pad2;
};
ClusterStoreShaderRD cluster_store_shader;
RID shader_version;
RID shader;
RID shader_pipeline;
} cluster_store;
struct ClusterDebug {
struct PushConstant {
uint32_t screen_size[2];
uint32_t cluster_screen_size[2];
uint32_t cluster_shift;
uint32_t cluster_type;
float z_near;
float z_far;
uint32_t orthogonal;
uint32_t max_cluster_element_count_div_32;
uint32_t pad1;
uint32_t pad2;
};
ClusterDebugShaderRD cluster_debug_shader;
RID shader_version;
RID shader;
RID shader_pipeline;
} cluster_debug;
public:
ClusterBuilderSharedDataRD();
~ClusterBuilderSharedDataRD();
};
class ClusterBuilderRD {
public:
enum LightType {
LIGHT_TYPE_OMNI,
LIGHT_TYPE_SPOT
};
enum BoxType {
BOX_TYPE_REFLECTION_PROBE,
BOX_TYPE_DECAL,
};
enum ElementType {
ELEMENT_TYPE_OMNI_LIGHT,
ELEMENT_TYPE_SPOT_LIGHT,
ELEMENT_TYPE_DECAL,
ELEMENT_TYPE_REFLECTION_PROBE,
ELEMENT_TYPE_MAX,
};
private:
ClusterBuilderSharedDataRD *shared = nullptr;
struct RenderElementData {
uint32_t type; //0-4
uint32_t touches_near;
uint32_t touches_far;
uint32_t original_index;
float transform_inv[12]; //transposed transform for less space
float scale[3];
uint32_t pad;
};
uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {};
uint32_t max_elements_by_type = 0;
RenderElementData *render_elements = nullptr;
uint32_t render_element_count = 0;
uint32_t render_element_max = 0;
Transform view_xform;
CameraMatrix adjusted_projection;
CameraMatrix projection;
float z_far = 0;
float z_near = 0;
bool orthogonal = false;
enum Divisor {
DIVISOR_1,
DIVISOR_2,
DIVISOR_4,
};
uint32_t cluster_size = 32;
bool use_msaa = true;
Divisor divisor = DIVISOR_4;
Size2i screen_size;
Size2i cluster_screen_size;
RID framebuffer;
RID cluster_render_buffer; //used for creating
RID cluster_buffer; //used for rendering
RID element_buffer; //used for storing, to hint element touches far plane or near plane
uint32_t cluster_render_buffer_size = 0;
uint32_t cluster_buffer_size = 0;
RID cluster_render_uniform_set;
RID cluster_store_uniform_set;
//persistent data
void _clear();
struct StateUniform {
float projection[16];
float inv_z_far;
uint32_t screen_to_clusters_shift; // shift to obtain coordinates in block indices
uint32_t cluster_screen_width; //
uint32_t cluster_data_size; // how much data for a single cluster takes
uint32_t cluster_depth_offset;
uint32_t pad0;
uint32_t pad1;
uint32_t pad2;
};
RID state_uniform;
RID debug_uniform_set;
public:
void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer);
void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y);
_FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) {
if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) {
return; //max number elements reached
}
if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) {
return; //max number elements reached
}
RenderElementData &e = render_elements[render_element_count];
Transform xform = view_xform * p_transform;
float radius = xform.basis.get_uniform_scale();
if (radius > 0.98 || radius < 1.02) {
xform.basis.orthonormalize();
}
radius *= p_radius;
if (p_type == LIGHT_TYPE_OMNI) {
radius *= shared->sphere_overfit; // overfit icosphere
//omni
float depth = -xform.origin.z;
if (orthogonal) {
e.touches_near = (depth - radius) < z_near;
} else {
//contains camera inside light
float radius2 = radius * shared->sphere_overfit; // overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex)
e.touches_near = xform.origin.length_squared() < radius2 * radius2;
}
e.touches_far = (depth + radius) > z_far;
e.scale[0] = radius;
e.scale[1] = radius;
e.scale[2] = radius;
e.type = ELEMENT_TYPE_OMNI_LIGHT;
e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT];
RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv);
cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++;
} else {
//spot
radius *= shared->cone_overfit; // overfit icosphere
real_t len = Math::tan(Math::deg2rad(p_spot_aperture)) * radius;
//approximate, probably better to use a cone support function
float max_d = -1e20;
float min_d = 1e20;
#define CONE_MINMAX(m_x, m_y) \
{ \
float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \
min_d = MIN(d, min_d); \
max_d = MAX(d, max_d); \
}
CONE_MINMAX(1, 1);
CONE_MINMAX(-1, 1);
CONE_MINMAX(-1, -1);
CONE_MINMAX(1, -1);
if (orthogonal) {
e.touches_near = min_d < z_near;
} else {
//contains camera inside light
Plane base_plane(xform.origin, -xform.basis.get_axis(Vector3::AXIS_Z));
float dist = base_plane.distance_to(Vector3());
if (dist >= 0 && dist < radius) {
//inside, check angle
float angle = Math::rad2deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_axis(Vector3::AXIS_Z))));
e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit
} else {
e.touches_near = false;
}
}
e.touches_far = max_d > z_far;
e.scale[0] = len * shared->cone_overfit;
e.scale[1] = len * shared->cone_overfit;
e.scale[2] = radius;
e.type = ELEMENT_TYPE_SPOT_LIGHT;
e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]; //use omni since they share index
RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv);
cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++;
}
render_element_count++;
}
_FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform &p_transform, const Vector3 &p_half_extents) {
if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) {
return; //max number elements reached
}
if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) {
return; //max number elements reached
}
RenderElementData &e = render_elements[render_element_count];
Transform xform = view_xform * p_transform;
//extract scale and scale the matrix by it, makes things simpler
Vector3 scale = p_half_extents;
for (uint32_t i = 0; i < 3; i++) {
float s = xform.basis.elements[i].length();
scale[i] *= s;
xform.basis.elements[i] /= s;
};
float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale));
float depth = -xform.origin.z;
if (orthogonal) {
e.touches_near = depth - box_depth < z_near;
} else {
//contains camera inside box
Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs();
e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z;
}
e.touches_far = depth + box_depth > z_far;
e.scale[0] = scale.x;
e.scale[1] = scale.y;
e.scale[2] = scale.z;
e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE;
e.original_index = cluster_count_by_type[e.type];
RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv);
cluster_count_by_type[e.type]++;
render_element_count++;
}
void bake_cluster();
void debug(ElementType p_element);
RID get_cluster_buffer() const;
uint32_t get_cluster_size() const;
uint32_t get_max_cluster_elements() const;
void set_shared(ClusterBuilderSharedDataRD *p_shared);
ClusterBuilderRD();
~ClusterBuilderRD();
};
#endif // CLUSTER_BUILDER_H

View file

@ -1,252 +0,0 @@
/*************************************************************************/
/* light_cluster_builder.cpp */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#include "light_cluster_builder.h"
void LightClusterBuilder::begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection) {
view_xform = p_view_transform;
projection = p_cam_projection;
z_near = -projection.get_z_near();
z_far = -projection.get_z_far();
//reset counts
light_count = 0;
refprobe_count = 0;
decal_count = 0;
item_count = 0;
sort_id_count = 0;
}
void LightClusterBuilder::bake_cluster() {
float slice_depth = (z_near - z_far) / depth;
uint8_t *cluster_dataw = cluster_data.ptrw();
Cell *cluster_data_ptr = (Cell *)cluster_dataw;
//clear the cluster
zeromem(cluster_data_ptr, (width * height * depth * sizeof(Cell)));
/* Step 1, create cell positions and count them */
for (uint32_t i = 0; i < item_count; i++) {
const Item &item = items[i];
int from_slice = Math::floor((z_near - (item.aabb.position.z + item.aabb.size.z)) / slice_depth);
int to_slice = Math::floor((z_near - item.aabb.position.z) / slice_depth);
if (from_slice >= (int)depth || to_slice < 0) {
continue; //sorry no go
}
from_slice = MAX(0, from_slice);
to_slice = MIN((int)depth - 1, to_slice);
for (int j = from_slice; j <= to_slice; j++) {
Vector3 min = item.aabb.position;
Vector3 max = item.aabb.position + item.aabb.size;
float limit_near = MIN((z_near - slice_depth * j), max.z);
float limit_far = MAX((z_near - slice_depth * (j + 1)), min.z);
max.z = limit_near;
min.z = limit_near;
Vector3 proj_min = projection.xform(min);
Vector3 proj_max = projection.xform(max);
int near_from_x = int(Math::floor((proj_min.x * 0.5 + 0.5) * width));
int near_from_y = int(Math::floor((-proj_max.y * 0.5 + 0.5) * height));
int near_to_x = int(Math::floor((proj_max.x * 0.5 + 0.5) * width));
int near_to_y = int(Math::floor((-proj_min.y * 0.5 + 0.5) * height));
max.z = limit_far;
min.z = limit_far;
proj_min = projection.xform(min);
proj_max = projection.xform(max);
int far_from_x = int(Math::floor((proj_min.x * 0.5 + 0.5) * width));
int far_from_y = int(Math::floor((-proj_max.y * 0.5 + 0.5) * height));
int far_to_x = int(Math::floor((proj_max.x * 0.5 + 0.5) * width));
int far_to_y = int(Math::floor((-proj_min.y * 0.5 + 0.5) * height));
//print_line(itos(j) + " near - " + Vector2i(near_from_x, near_from_y) + " -> " + Vector2i(near_to_x, near_to_y));
//print_line(itos(j) + " far - " + Vector2i(far_from_x, far_from_y) + " -> " + Vector2i(far_to_x, far_to_y));
int from_x = MIN(near_from_x, far_from_x);
int from_y = MIN(near_from_y, far_from_y);
int to_x = MAX(near_to_x, far_to_x);
int to_y = MAX(near_to_y, far_to_y);
if (from_x >= (int)width || to_x < 0 || from_y >= (int)height || to_y < 0) {
continue;
}
int sx = MAX(0, from_x);
int sy = MAX(0, from_y);
int dx = MIN((int)width - 1, to_x);
int dy = MIN((int)height - 1, to_y);
//print_line(itos(j) + " - " + Vector2i(sx, sy) + " -> " + Vector2i(dx, dy));
for (int x = sx; x <= dx; x++) {
for (int y = sy; y <= dy; y++) {
uint32_t offset = j * (width * height) + y * width + x;
if (unlikely(sort_id_count == sort_id_max)) {
sort_id_max = nearest_power_of_2_templated(sort_id_max + 1);
sort_ids = (SortID *)memrealloc(sort_ids, sizeof(SortID) * sort_id_max);
if (ids.size()) {
ids.resize(sort_id_max);
RD::get_singleton()->free(items_buffer);
items_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * sort_id_max);
}
}
sort_ids[sort_id_count].cell_index = offset;
sort_ids[sort_id_count].item_index = item.index;
sort_ids[sort_id_count].item_type = item.type;
sort_id_count++;
//for now, only count
cluster_data_ptr[offset].item_pointers[item.type]++;
//print_line("at offset " + itos(offset) + " value: " + itos(cluster_data_ptr[offset].item_pointers[item.type]));
}
}
}
}
/* Step 2, Assign pointers (and reset counters) */
uint32_t offset = 0;
for (uint32_t i = 0; i < (width * height * depth); i++) {
for (int j = 0; j < ITEM_TYPE_MAX; j++) {
uint32_t count = cluster_data_ptr[i].item_pointers[j]; //save count
cluster_data_ptr[i].item_pointers[j] = offset; //replace count by pointer
offset += count; //increase offset by count;
}
}
//print_line("offset: " + itos(offset));
/* Step 3, Place item lists */
uint32_t *ids_ptr = ids.ptrw();
for (uint32_t i = 0; i < sort_id_count; i++) {
const SortID &id = sort_ids[i];
Cell &cell = cluster_data_ptr[id.cell_index];
uint32_t pointer = cell.item_pointers[id.item_type] & POINTER_MASK;
uint32_t counter = cell.item_pointers[id.item_type] >> COUNTER_SHIFT;
ids_ptr[pointer + counter] = id.item_index;
cell.item_pointers[id.item_type] = pointer | ((counter + 1) << COUNTER_SHIFT);
}
RD::get_singleton()->texture_update(cluster_texture, 0, cluster_data, true);
RD::get_singleton()->buffer_update(items_buffer, 0, offset * sizeof(uint32_t), ids_ptr, true);
}
void LightClusterBuilder::setup(uint32_t p_width, uint32_t p_height, uint32_t p_depth) {
if (width == p_width && height == p_height && depth == p_depth) {
return;
}
if (cluster_texture.is_valid()) {
RD::get_singleton()->free(cluster_texture);
}
width = p_width;
height = p_height;
depth = p_depth;
cluster_data.resize(width * height * depth * sizeof(Cell));
{
RD::TextureFormat tf;
tf.format = RD::DATA_FORMAT_R32G32B32A32_UINT;
tf.texture_type = RD::TEXTURE_TYPE_3D;
tf.width = width;
tf.height = height;
tf.depth = depth;
tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
cluster_texture = RD::get_singleton()->texture_create(tf, RD::TextureView());
}
}
RID LightClusterBuilder::get_cluster_texture() const {
return cluster_texture;
}
RID LightClusterBuilder::get_cluster_indices_buffer() const {
return items_buffer;
}
LightClusterBuilder::LightClusterBuilder() {
//initialize accumulators to something
lights = (LightData *)memalloc(sizeof(LightData) * 1024);
light_max = 1024;
refprobes = (OrientedBoxData *)memalloc(sizeof(OrientedBoxData) * 1024);
refprobe_max = 1024;
decals = (OrientedBoxData *)memalloc(sizeof(OrientedBoxData) * 1024);
decal_max = 1024;
items = (Item *)memalloc(sizeof(Item) * 1024);
item_max = 1024;
sort_ids = (SortID *)memalloc(sizeof(SortID) * 1024);
ids.resize(2014);
items_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 1024);
item_max = 1024;
}
LightClusterBuilder::~LightClusterBuilder() {
if (cluster_data.size()) {
RD::get_singleton()->free(cluster_texture);
}
if (lights) {
memfree(lights);
}
if (refprobes) {
memfree(refprobes);
}
if (decals) {
memfree(decals);
}
if (items) {
memfree(items);
}
if (sort_ids) {
memfree(sort_ids);
RD::get_singleton()->free(items_buffer);
}
}

View file

@ -1,290 +0,0 @@
/*************************************************************************/
/* light_cluster_builder.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/
#ifndef LIGHT_CLUSTER_BUILDER_H
#define LIGHT_CLUSTER_BUILDER_H
#include "servers/rendering/renderer_rd/renderer_storage_rd.h"
class LightClusterBuilder {
public:
enum LightType {
LIGHT_TYPE_OMNI,
LIGHT_TYPE_SPOT
};
enum ItemType {
ITEM_TYPE_OMNI_LIGHT,
ITEM_TYPE_SPOT_LIGHT,
ITEM_TYPE_REFLECTION_PROBE,
ITEM_TYPE_DECAL,
ITEM_TYPE_MAX //should always be 4
};
enum {
COUNTER_SHIFT = 20, //one million total ids
POINTER_MASK = (1 << COUNTER_SHIFT) - 1,
COUNTER_MASK = 0xfff // 4096 items per cell
};
private:
struct LightData {
float position[3];
uint32_t type;
float radius;
float spot_aperture;
uint32_t pad[2];
};
uint32_t light_count = 0;
uint32_t light_max = 0;
LightData *lights = nullptr;
struct OrientedBoxData {
float position[3];
uint32_t pad;
float x_axis[3];
uint32_t pad2;
float y_axis[3];
uint32_t pad3;
float z_axis[3];
uint32_t pad4;
};
uint32_t refprobe_count = 0;
uint32_t refprobe_max = 0;
OrientedBoxData *refprobes = nullptr;
uint32_t decal_count = 0;
uint32_t decal_max = 0;
OrientedBoxData *decals = nullptr;
struct Item {
AABB aabb;
ItemType type;
uint32_t index;
};
Item *items = nullptr;
uint32_t item_count = 0;
uint32_t item_max = 0;
uint32_t width = 0;
uint32_t height = 0;
uint32_t depth = 0;
struct Cell {
uint32_t item_pointers[ITEM_TYPE_MAX];
};
Vector<uint8_t> cluster_data;
RID cluster_texture;
struct SortID {
uint32_t cell_index;
uint32_t item_index;
ItemType item_type;
};
SortID *sort_ids = nullptr;
Vector<uint32_t> ids;
uint32_t sort_id_count = 0;
uint32_t sort_id_max = 0;
RID items_buffer;
Transform view_xform;
CameraMatrix projection;
float z_far = 0;
float z_near = 0;
_FORCE_INLINE_ void _add_item(const AABB &p_aabb, ItemType p_type, uint32_t p_index) {
if (unlikely(item_count == item_max)) {
item_max = nearest_power_of_2_templated(item_max + 1);
items = (Item *)memrealloc(items, sizeof(Item) * item_max);
}
Item &item = items[item_count];
item.aabb = p_aabb;
item.index = p_index;
item.type = p_type;
item_count++;
}
public:
void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection);
_FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) {
if (unlikely(light_count == light_max)) {
light_max = nearest_power_of_2_templated(light_max + 1);
lights = (LightData *)memrealloc(lights, sizeof(LightData) * light_max);
}
LightData &ld = lights[light_count];
ld.type = p_type;
ld.position[0] = p_transform.origin.x;
ld.position[1] = p_transform.origin.y;
ld.position[2] = p_transform.origin.z;
ld.radius = p_radius;
ld.spot_aperture = p_spot_aperture;
Transform xform = view_xform * p_transform;
ld.radius *= xform.basis.get_uniform_scale();
AABB aabb;
switch (p_type) {
case LIGHT_TYPE_OMNI: {
aabb.position = xform.origin;
aabb.size = Vector3(ld.radius, ld.radius, ld.radius);
aabb.position -= aabb.size;
aabb.size *= 2.0;
_add_item(aabb, ITEM_TYPE_OMNI_LIGHT, light_count);
} break;
case LIGHT_TYPE_SPOT: {
float r = ld.radius;
real_t len = Math::tan(Math::deg2rad(ld.spot_aperture)) * r;
aabb.position = xform.origin;
aabb.expand_to(xform.xform(Vector3(len, len, -r)));
aabb.expand_to(xform.xform(Vector3(-len, len, -r)));
aabb.expand_to(xform.xform(Vector3(-len, -len, -r)));
aabb.expand_to(xform.xform(Vector3(len, -len, -r)));
_add_item(aabb, ITEM_TYPE_SPOT_LIGHT, light_count);
} break;
}
light_count++;
}
_FORCE_INLINE_ void add_reflection_probe(const Transform &p_transform, const Vector3 &p_half_extents) {
if (unlikely(refprobe_count == refprobe_max)) {
refprobe_max = nearest_power_of_2_templated(refprobe_max + 1);
refprobes = (OrientedBoxData *)memrealloc(refprobes, sizeof(OrientedBoxData) * refprobe_max);
}
Transform xform = view_xform * p_transform;
OrientedBoxData &rp = refprobes[refprobe_count];
Vector3 origin = xform.origin;
rp.position[0] = origin.x;
rp.position[1] = origin.y;
rp.position[2] = origin.z;
Vector3 x_axis = xform.basis.get_axis(0) * p_half_extents.x;
rp.x_axis[0] = x_axis.x;
rp.x_axis[1] = x_axis.y;
rp.x_axis[2] = x_axis.z;
Vector3 y_axis = xform.basis.get_axis(1) * p_half_extents.y;
rp.y_axis[0] = y_axis.x;
rp.y_axis[1] = y_axis.y;
rp.y_axis[2] = y_axis.z;
Vector3 z_axis = xform.basis.get_axis(2) * p_half_extents.z;
rp.z_axis[0] = z_axis.x;
rp.z_axis[1] = z_axis.y;
rp.z_axis[2] = z_axis.z;
AABB aabb;
aabb.position = origin + x_axis + y_axis + z_axis;
aabb.expand_to(origin + x_axis + y_axis - z_axis);
aabb.expand_to(origin + x_axis - y_axis + z_axis);
aabb.expand_to(origin + x_axis - y_axis - z_axis);
aabb.expand_to(origin - x_axis + y_axis + z_axis);
aabb.expand_to(origin - x_axis + y_axis - z_axis);
aabb.expand_to(origin - x_axis - y_axis + z_axis);
aabb.expand_to(origin - x_axis - y_axis - z_axis);
_add_item(aabb, ITEM_TYPE_REFLECTION_PROBE, refprobe_count);
refprobe_count++;
}
_FORCE_INLINE_ void add_decal(const Transform &p_transform, const Vector3 &p_half_extents) {
if (unlikely(decal_count == decal_max)) {
decal_max = nearest_power_of_2_templated(decal_max + 1);
decals = (OrientedBoxData *)memrealloc(decals, sizeof(OrientedBoxData) * decal_max);
}
Transform xform = view_xform * p_transform;
OrientedBoxData &dc = decals[decal_count];
Vector3 origin = xform.origin;
dc.position[0] = origin.x;
dc.position[1] = origin.y;
dc.position[2] = origin.z;
Vector3 x_axis = xform.basis.get_axis(0) * p_half_extents.x;
dc.x_axis[0] = x_axis.x;
dc.x_axis[1] = x_axis.y;
dc.x_axis[2] = x_axis.z;
Vector3 y_axis = xform.basis.get_axis(1) * p_half_extents.y;
dc.y_axis[0] = y_axis.x;
dc.y_axis[1] = y_axis.y;
dc.y_axis[2] = y_axis.z;
Vector3 z_axis = xform.basis.get_axis(2) * p_half_extents.z;
dc.z_axis[0] = z_axis.x;
dc.z_axis[1] = z_axis.y;
dc.z_axis[2] = z_axis.z;
AABB aabb;
aabb.position = origin + x_axis + y_axis + z_axis;
aabb.expand_to(origin + x_axis + y_axis - z_axis);
aabb.expand_to(origin + x_axis - y_axis + z_axis);
aabb.expand_to(origin + x_axis - y_axis - z_axis);
aabb.expand_to(origin - x_axis + y_axis + z_axis);
aabb.expand_to(origin - x_axis + y_axis - z_axis);
aabb.expand_to(origin - x_axis - y_axis + z_axis);
aabb.expand_to(origin - x_axis - y_axis - z_axis);
_add_item(aabb, ITEM_TYPE_DECAL, decal_count);
decal_count++;
}
void bake_cluster();
void setup(uint32_t p_width, uint32_t p_height, uint32_t p_depth);
RID get_cluster_texture() const;
RID get_cluster_indices_buffer() const;
LightClusterBuilder();
~LightClusterBuilder();
};
#endif // LIGHT_CLUSTER_BUILDER_H

View file

@ -1071,7 +1071,7 @@ void RendererSceneRenderForward::_render_list_with_threads(RenderListParameters
}
}
void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) {
void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) {
//CameraMatrix projection = p_cam_projection;
//projection.flip_y(); // Vulkan and modern APIs use Y-Down
CameraMatrix correction;
@ -1099,8 +1099,18 @@ void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_ren
scene_state.ubo.penumbra_shadow_samples = penumbra_shadow_samples_get();
scene_state.ubo.soft_shadow_samples = soft_shadow_samples_get();
scene_state.ubo.screen_pixel_size[0] = p_screen_pixel_size.x;
scene_state.ubo.screen_pixel_size[1] = p_screen_pixel_size.y;
Size2 screen_pixel_size = Vector2(1.0, 1.0) / Size2(p_screen_size);
scene_state.ubo.screen_pixel_size[0] = screen_pixel_size.x;
scene_state.ubo.screen_pixel_size[1] = screen_pixel_size.y;
scene_state.ubo.cluster_shift = get_shift_from_power_of_2(p_cluster_size);
scene_state.ubo.max_cluster_element_count_div_32 = p_max_cluster_elements / 32;
{
uint32_t cluster_screen_width = (p_screen_size.width - 1) / p_cluster_size + 1;
uint32_t cluster_screen_height = (p_screen_size.height - 1) / p_cluster_size + 1;
scene_state.ubo.cluster_type_size = cluster_screen_width * cluster_screen_height * (scene_state.ubo.max_cluster_element_count_div_32 + 32);
scene_state.ubo.cluster_width = cluster_screen_width;
}
if (p_shadow_atlas.is_valid()) {
Vector2 sas = shadow_atlas_get_size(p_shadow_atlas);
@ -1489,7 +1499,7 @@ void RendererSceneRenderForward::_setup_lightmaps(const PagedArray<RID> &p_light
}
}
void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) {
void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_screen_lod_threshold) {
RenderBufferDataForward *render_buffer = nullptr;
if (p_render_buffer.is_valid()) {
render_buffer = (RenderBufferDataForward *)render_buffers_get_data(p_render_buffer);
@ -1522,7 +1532,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
scene_state.ubo.viewport_size[1] = vp_he.y;
scene_state.ubo.directional_light_count = p_directional_light_count;
Size2 screen_pixel_size;
Size2i screen_size;
RID opaque_framebuffer;
RID opaque_specular_framebuffer;
@ -1537,8 +1546,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
bool using_giprobe = false;
if (render_buffer) {
screen_pixel_size.width = 1.0 / render_buffer->width;
screen_pixel_size.height = 1.0 / render_buffer->height;
screen_size.x = render_buffer->width;
screen_size.y = render_buffer->height;
@ -1595,8 +1602,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
alpha_framebuffer = opaque_framebuffer;
} else if (p_reflection_probe.is_valid()) {
uint32_t resolution = reflection_probe_instance_get_resolution(p_reflection_probe);
screen_pixel_size.width = 1.0 / resolution;
screen_pixel_size.height = 1.0 / resolution;
screen_size.x = resolution;
screen_size.y = resolution;
@ -1613,7 +1618,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
_setup_lightmaps(p_lightmaps, p_cam_transform);
_setup_giprobes(p_gi_probes);
_setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);
_setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);
_update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example)
@ -1703,7 +1708,6 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
bool debug_giprobes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_ALBEDO || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_LIGHTING || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_GI_PROBE_EMISSION;
bool debug_sdfgi_probes = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_SDFGI_PROBES;
bool depth_pre_pass = !low_end && depth_framebuffer.is_valid();
bool using_ssao = depth_pre_pass && p_render_buffer.is_valid() && p_environment.is_valid() && environment_is_ssao_enabled(p_environment);
@ -1711,7 +1715,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
if (depth_pre_pass) { //depth pre pass
RENDER_TIMESTAMP("Render Depth Pre-Pass");
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
bool finish_depth = using_ssao || using_sdfgi || using_giprobe;
RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold);
@ -1738,11 +1742,11 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
_process_gi(p_render_buffer, render_buffer->normal_roughness_buffer, render_buffer->ambient_buffer, render_buffer->reflection_buffer, render_buffer->giprobe_buffer, p_environment, p_cam_projection, p_cam_transform, p_gi_probes);
}
_setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid());
_setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), p_render_buffer.is_valid());
RENDER_TIMESTAMP("Render Opaque Pass");
RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_gi_probes, p_lightmaps);
RID rp_uniform_set = _setup_render_pass_uniform_set(p_render_buffer, radiance_texture, p_shadow_atlas, p_reflection_atlas, p_cluster_buffer, p_gi_probes, p_lightmaps);
bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss;
bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss;
@ -1844,7 +1848,7 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
RENDER_TIMESTAMP("Render Transparent Pass");
_setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_pixel_size, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);
_setup_environment(p_environment, p_render_buffer, p_cam_projection, p_cam_transform, p_reflection_probe, p_reflection_probe.is_valid(), screen_size, p_cluster_size, p_max_cluster_elements, p_shadow_atlas, !p_reflection_probe.is_valid(), p_default_bg_color, p_cam_projection.get_z_near(), p_cam_projection.get_z_far(), false);
render_list.sort_by_reverse_depth_and_priority(true);
@ -1867,7 +1871,7 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr
scene_state.ubo.dual_paraboloid_side = p_use_dp_flip ? -1 : 1;
_setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_zfar, false, p_use_pancake);
_setup_environment(RID(), RID(), p_projection, p_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_zfar, false, p_use_pancake);
if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) {
p_screen_lod_threshold = 0.0;
@ -1877,7 +1881,7 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr
_fill_render_list(p_instances, pass_mode, p_projection, p_transform);
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RENDER_TIMESTAMP("Render Shadow");
@ -1899,13 +1903,13 @@ void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb,
scene_state.ubo.dual_paraboloid_side = 0;
_setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false);
_setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), true, Color(), 0, p_cam_projection.get_z_far(), false, false);
PassMode pass_mode = PASS_MODE_SHADOW;
_fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform);
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RENDER_TIMESTAMP("Render Collider Heightield");
@ -1928,12 +1932,12 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo
scene_state.ubo.dual_paraboloid_side = 0;
scene_state.ubo.material_uv2_mode = true;
_setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0);
_setup_environment(RID(), RID(), p_cam_projection, p_cam_transform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0);
PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL;
_fill_render_list(p_instances, pass_mode, p_cam_projection, p_cam_transform);
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RENDER_TIMESTAMP("Render Material");
@ -1964,12 +1968,12 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance *
scene_state.ubo.dual_paraboloid_side = 0;
scene_state.ubo.material_uv2_mode = true;
_setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0);
_setup_environment(RID(), RID(), CameraMatrix(), Transform(), RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0);
PassMode pass_mode = PASS_MODE_DEPTH_MATERIAL;
_fill_render_list(p_instances, pass_mode, CameraMatrix(), Transform());
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
RENDER_TIMESTAMP("Render Material");
@ -2079,7 +2083,7 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto
RendererStorageRD::store_transform(to_bounds.affine_inverse() * cam_xform, scene_state.ubo.sdf_to_bounds);
_setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), RID(), false, Color(), 0, 0);
_setup_environment(RID(), RID(), camera_proj, cam_xform, RID(), true, Vector2(1, 1), 1, 32, RID(), false, Color(), 0, 0);
Map<Size2i, RID>::Element *E = sdfgi_framebuffer_size_cache.find(fb_size);
if (!E) {
@ -2150,20 +2154,27 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
RD::Uniform u;
u.binding = 5;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.ids.push_back(get_positional_light_buffer());
u.ids.push_back(get_omni_light_buffer());
uniforms.push_back(u);
}
{
RD::Uniform u;
u.binding = 6;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.ids.push_back(get_spot_light_buffer());
uniforms.push_back(u);
}
{
RD::Uniform u;
u.binding = 6;
u.binding = 7;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.ids.push_back(get_reflection_probe_buffer());
uniforms.push_back(u);
}
{
RD::Uniform u;
u.binding = 7;
u.binding = 8;
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
u.ids.push_back(get_directional_light_buffer());
uniforms.push_back(u);
@ -2210,21 +2221,6 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
RD::Uniform u;
u.binding = 15;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
u.ids.push_back(get_cluster_builder_texture());
uniforms.push_back(u);
}
{
RD::Uniform u;
u.binding = 16;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.ids.push_back(get_cluster_builder_indices_buffer());
uniforms.push_back(u);
}
{
RD::Uniform u;
u.binding = 17;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
if (directional_shadow_get_texture().is_valid()) {
u.ids.push_back(directional_shadow_get_texture());
} else {
@ -2236,7 +2232,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 18;
u.binding = 16;
u.ids.push_back(storage->global_variables_get_storage_buffer());
uniforms.push_back(u);
}
@ -2244,7 +2240,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
if (!low_end) {
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
u.binding = 19;
u.binding = 17;
u.ids.push_back(sdfgi_get_ubo());
uniforms.push_back(u);
}
@ -2253,7 +2249,7 @@ void RendererSceneRenderForward::_update_render_base_uniform_set() {
}
}
RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps) {
RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps) {
if (render_pass_uniform_set.is_valid() && RD::get_singleton()->uniform_set_is_valid(render_pass_uniform_set)) {
RD::get_singleton()->free(render_pass_uniform_set);
}
@ -2351,6 +2347,15 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
{
RD::Uniform u;
u.binding = 5;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
RID cb = p_cluster_buffer.is_valid() ? p_cluster_buffer : default_vec4_xform_buffer;
u.ids.push_back(cb);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.binding = 6;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID texture = (false && rb && rb->depth.is_valid()) ? rb->depth : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_WHITE);
u.ids.push_back(texture);
@ -2358,17 +2363,18 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
}
{
RD::Uniform u;
u.binding = 6;
u.binding = 7;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID bbt = rb ? render_buffers_get_back_buffer_texture(p_render_buffers) : RID();
RID texture = bbt.is_valid() ? bbt : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK);
u.ids.push_back(texture);
uniforms.push_back(u);
}
if (!low_end) {
{
RD::Uniform u;
u.binding = 7;
u.binding = 8;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID texture = rb && rb->normal_roughness_buffer.is_valid() ? rb->normal_roughness_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_NORMAL);
u.ids.push_back(texture);
@ -2377,7 +2383,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
{
RD::Uniform u;
u.binding = 8;
u.binding = 9;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID aot = rb ? render_buffers_get_ao_texture(p_render_buffers) : RID();
RID texture = aot.is_valid() ? aot : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK);
@ -2387,7 +2393,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
{
RD::Uniform u;
u.binding = 9;
u.binding = 10;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID texture = rb && rb->ambient_buffer.is_valid() ? rb->ambient_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK);
u.ids.push_back(texture);
@ -2396,7 +2402,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
{
RD::Uniform u;
u.binding = 10;
u.binding = 11;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID texture = rb && rb->reflection_buffer.is_valid() ? rb->reflection_buffer : storage->texture_rd_get_default(RendererStorageRD::DEFAULT_RD_TEXTURE_BLACK);
u.ids.push_back(texture);
@ -2404,7 +2410,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
}
{
RD::Uniform u;
u.binding = 11;
u.binding = 12;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID t;
if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) {
@ -2417,7 +2423,7 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
}
{
RD::Uniform u;
u.binding = 12;
u.binding = 13;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
if (rb && render_buffers_is_sdfgi_enabled(p_render_buffers)) {
u.ids.push_back(render_buffers_get_sdfgi_occlusion_texture(p_render_buffers));
@ -2428,14 +2434,14 @@ RID RendererSceneRenderForward::_setup_render_pass_uniform_set(RID p_render_buff
}
{
RD::Uniform u;
u.binding = 13;
u.binding = 14;
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
u.ids.push_back(rb ? render_buffers_get_gi_probe_buffer(p_render_buffers) : render_buffers_get_default_gi_probe_buffer());
uniforms.push_back(u);
}
{
RD::Uniform u;
u.binding = 14;
u.binding = 15;
u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
RID vfog = RID();
if (rb && render_buffers_has_volumetric_fog(p_render_buffers)) {
@ -2519,33 +2525,43 @@ RID RendererSceneRenderForward::_setup_sdfgi_render_pass_uniform_set(RID p_albed
uniforms.push_back(u);
}
{
RD::Uniform u;
u.binding = 5;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
RID cb = default_vec4_xform_buffer;
u.ids.push_back(cb);
uniforms.push_back(u);
}
// actual sdfgi stuff
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
u.binding = 5;
u.binding = 6;
u.ids.push_back(p_albedo_texture);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
u.binding = 6;
u.binding = 7;
u.ids.push_back(p_emission_texture);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
u.binding = 7;
u.binding = 8;
u.ids.push_back(p_emission_aniso_texture);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
u.binding = 8;
u.binding = 9;
u.ids.push_back(p_geom_facing_texture);
uniforms.push_back(u);
}

View file

@ -263,7 +263,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
void _update_render_base_uniform_set();
RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture);
RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps);
RID _setup_render_pass_uniform_set(RID p_render_buffers, RID p_radiance_texture, RID p_shadow_atlas, RID p_reflection_atlas, RID p_cluster_buffer, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps);
struct LightmapData {
float normal_xform[12];
@ -300,6 +300,11 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
float viewport_size[2];
float screen_pixel_size[2];
uint32_t cluster_shift;
uint32_t cluster_width;
uint32_t cluster_type_size;
uint32_t max_cluster_element_count_div_32;
float directional_penumbra_shadow_kernel[128]; //32 vec4s
float directional_soft_shadow_kernel[128];
float penumbra_shadow_kernel[128];
@ -421,7 +426,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
PASS_MODE_SDF,
};
void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false);
void _setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2i &p_screen_size, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false);
void _setup_giprobes(const PagedArray<RID> &p_giprobes);
void _setup_lightmaps(const PagedArray<RID> &p_lightmaps, const Transform &p_cam_transform);
@ -701,7 +706,7 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
RenderList render_list;
protected:
virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold);
virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_max_cluster_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_bg_color, float p_lod_threshold);
virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0);
virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region);
virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region);

File diff suppressed because it is too large Load diff

View file

@ -34,7 +34,7 @@
#include "core/templates/local_vector.h"
#include "core/templates/rid_owner.h"
#include "servers/rendering/renderer_compositor.h"
#include "servers/rendering/renderer_rd/light_cluster_builder.h"
#include "servers/rendering/renderer_rd/cluster_builder_rd.h"
#include "servers/rendering/renderer_rd/renderer_storage_rd.h"
#include "servers/rendering/renderer_rd/shaders/gi.glsl.gen.h"
#include "servers/rendering/renderer_rd/shaders/giprobe.glsl.gen.h"
@ -104,12 +104,12 @@ protected:
};
virtual RenderBufferData *_create_render_buffer_data() = 0;
void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_inverse_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count);
void _setup_lights(const PagedArray<RID> &p_lights, const Transform &p_camera_transform, RID p_shadow_atlas, bool p_using_shadows, uint32_t &r_directional_light_count, uint32_t &r_positional_light_count);
void _setup_decals(const PagedArray<RID> &p_decals, const Transform &p_camera_inverse_xform);
void _setup_reflections(const PagedArray<RID> &p_reflections, const Transform &p_camera_inverse_transform, RID p_environment);
void _setup_giprobes(RID p_render_buffers, const Transform &p_transform, const PagedArray<RID> &p_gi_probes, uint32_t &r_gi_probes_used);
virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0;
virtual void _render_scene(RID p_render_buffer, const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, int p_directional_light_count, const PagedArray<RID> &p_gi_probes, const PagedArray<RID> &p_lightmaps, RID p_environment, RID p_cluster_buffer, uint32_t p_cluster_size, uint32_t p_cluster_max_elements, RID p_camera_effects, RID p_shadow_atlas, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, const Color &p_default_color, float p_screen_lod_threshold) = 0;
virtual void _render_shadow(RID p_framebuffer, const PagedArray<GeometryInstance *> &p_instances, const CameraMatrix &p_projection, const Transform &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool use_dp_flip, bool p_use_pancake, const Plane &p_camera_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) = 0;
virtual void _render_material(const Transform &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_ortogonal, const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0;
virtual void _render_uv2(const PagedArray<GeometryInstance *> &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0;
@ -341,6 +341,8 @@ private:
};
Vector<Reflection> reflections;
ClusterBuilderRD *cluster_builder = nullptr;
};
mutable RID_Owner<ReflectionAtlas> reflection_atlas_owner;
@ -833,6 +835,9 @@ private:
/* RENDER BUFFERS */
ClusterBuilderSharedDataRD cluster_builder_shared;
ClusterBuilderRD *current_cluster_builder = nullptr;
struct SDFGI;
struct VolumetricFog;
@ -858,6 +863,8 @@ private:
SDFGI *sdfgi = nullptr;
VolumetricFog *volumetric_fog = nullptr;
ClusterBuilderRD *cluster_builder = nullptr;
//built-in textures used for ping pong image processing and blurring
struct Blur {
RID texture;
@ -1259,7 +1266,7 @@ private:
uint32_t max_giprobes;
uint32_t high_quality_vct;
uint32_t use_sdfgi;
uint32_t pad2;
uint32_t orthogonal;
float ao_color[3];
@ -1269,8 +1276,11 @@ private:
};
RID sdfgi_ubo;
enum {
MODE_MAX = 1
enum Mode {
MODE_GIPROBE,
MODE_SDFGI,
MODE_COMBINED,
MODE_MAX
};
GiShaderRD shader;
@ -1394,18 +1404,39 @@ private:
float normal_fade;
};
template <class T>
struct InstanceSort {
float depth;
T *instance;
bool operator<(const InstanceSort &p_sort) const {
return depth < p_sort.depth;
}
};
ReflectionData *reflections;
InstanceSort<ReflectionProbeInstance> *reflection_sort;
uint32_t max_reflections;
RID reflection_buffer;
uint32_t max_reflection_probes_per_instance;
uint32_t reflection_count = 0;
DecalData *decals;
InstanceSort<DecalInstance> *decal_sort;
uint32_t max_decals;
RID decal_buffer;
uint32_t decal_count;
LightData *lights;
LightData *omni_lights;
LightData *spot_lights;
InstanceSort<LightInstance> *omni_light_sort;
InstanceSort<LightInstance> *spot_light_sort;
uint32_t max_lights;
RID light_buffer;
RID omni_light_buffer;
RID spot_light_buffer;
uint32_t omni_light_count = 0;
uint32_t spot_light_count = 0;
RID *lights_instances;
Rect2i *lights_shadow_rect_cache;
uint32_t lights_shadow_rect_cache_count = 0;
@ -1414,8 +1445,6 @@ private:
uint32_t max_directional_lights;
RID directional_light_buffer;
LightClusterBuilder builder;
} cluster;
struct VolumetricFog {
@ -1445,7 +1474,7 @@ private:
};
struct VolumetricFogShader {
struct PushConstant {
struct ParamsUBO {
float fog_frustum_size_begin[2];
float fog_frustum_size_end[2];
@ -1463,13 +1492,21 @@ private:
float detail_spread;
float gi_inject;
uint32_t max_gi_probes;
uint32_t pad;
uint32_t cluster_type_size;
float screen_size[2];
uint32_t cluster_shift;
uint32_t cluster_width;
uint32_t cluster_pad[3];
uint32_t max_cluster_element_count_div_32;
float cam_rotation[12];
};
VolumetricFogShaderRD shader;
RID params_ubo;
RID shader_version;
RID pipelines[VOLUMETRIC_FOG_SHADER_MAX];
@ -1494,6 +1531,7 @@ private:
float weight;
};
uint32_t max_cluster_elements = 512;
bool low_end = false;
public:
@ -2002,10 +2040,9 @@ public:
virtual void set_time(double p_time, double p_step);
RID get_cluster_builder_texture();
RID get_cluster_builder_indices_buffer();
RID get_reflection_probe_buffer();
RID get_positional_light_buffer();
RID get_omni_light_buffer();
RID get_spot_light_buffer();
RID get_directional_light_buffer();
RID get_decal_buffer();
int get_max_directional_lights() const;

View file

@ -7340,6 +7340,7 @@ void RendererStorageRD::_update_decal_atlas() {
tformat.shareable_formats.push_back(RD::DATA_FORMAT_R8G8B8A8_SRGB);
decal_atlas.texture = RD::get_singleton()->texture_create(tformat, RD::TextureView());
RD::get_singleton()->texture_clear(decal_atlas.texture, Color(0, 0, 0, 0), 0, decal_atlas.mipmaps, 0, 1, true);
{
//create the framebuffer

View file

@ -95,6 +95,21 @@ public:
p_array[11] = 0;
}
static _FORCE_INLINE_ void store_transform_transposed_3x4(const Transform &p_mtx, float *p_array) {
p_array[0] = p_mtx.basis.elements[0][0];
p_array[1] = p_mtx.basis.elements[0][1];
p_array[2] = p_mtx.basis.elements[0][2];
p_array[3] = p_mtx.origin.x;
p_array[4] = p_mtx.basis.elements[1][0];
p_array[5] = p_mtx.basis.elements[1][1];
p_array[6] = p_mtx.basis.elements[1][2];
p_array[7] = p_mtx.origin.y;
p_array[8] = p_mtx.basis.elements[2][0];
p_array[9] = p_mtx.basis.elements[2][1];
p_array[10] = p_mtx.basis.elements[2][2];
p_array[11] = p_mtx.origin.z;
}
static _FORCE_INLINE_ void store_camera(const CameraMatrix &p_mtx, float *p_array) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {

View file

@ -44,3 +44,6 @@ if "RD_GLSL" in env["BUILDERS"]:
env.RD_GLSL("particles_copy.glsl")
env.RD_GLSL("sort.glsl")
env.RD_GLSL("skeleton.glsl")
env.RD_GLSL("cluster_render.glsl")
env.RD_GLSL("cluster_store.glsl")
env.RD_GLSL("cluster_debug.glsl")

View file

@ -0,0 +1,115 @@
#[compute]
#version 450
VERSION_DEFINES
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
const vec3 usage_gradient[33] = vec3[]( // 1 (none) + 32
vec3(0.14, 0.17, 0.23),
vec3(0.24, 0.44, 0.83),
vec3(0.23, 0.57, 0.84),
vec3(0.22, 0.71, 0.84),
vec3(0.22, 0.85, 0.83),
vec3(0.21, 0.85, 0.72),
vec3(0.21, 0.85, 0.57),
vec3(0.20, 0.85, 0.42),
vec3(0.20, 0.85, 0.27),
vec3(0.27, 0.86, 0.19),
vec3(0.51, 0.85, 0.19),
vec3(0.57, 0.86, 0.19),
vec3(0.62, 0.85, 0.19),
vec3(0.67, 0.86, 0.20),
vec3(0.73, 0.85, 0.20),
vec3(0.78, 0.85, 0.20),
vec3(0.83, 0.85, 0.20),
vec3(0.85, 0.82, 0.20),
vec3(0.85, 0.76, 0.20),
vec3(0.85, 0.81, 0.20),
vec3(0.85, 0.65, 0.20),
vec3(0.84, 0.60, 0.21),
vec3(0.84, 0.56, 0.21),
vec3(0.84, 0.51, 0.21),
vec3(0.84, 0.46, 0.21),
vec3(0.84, 0.41, 0.21),
vec3(0.84, 0.36, 0.21),
vec3(0.84, 0.31, 0.21),
vec3(0.84, 0.27, 0.21),
vec3(0.83, 0.22, 0.22),
vec3(0.83, 0.22, 0.27),
vec3(0.83, 0.22, 0.32),
vec3(1.00, 0.63, 0.70));
layout(push_constant, binding = 0, std430) uniform Params {
uvec2 screen_size;
uvec2 cluster_screen_size;
uint cluster_shift;
uint cluster_type;
float z_near;
float z_far;
bool orthogonal;
uint max_cluster_element_count_div_32;
uint pad1;
uint pad2;
}
params;
layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterData {
uint data[];
}
cluster_data;
layout(rgba16f, set = 0, binding = 2) uniform restrict writeonly image2D screen_buffer;
layout(set = 0, binding = 3) uniform texture2D depth_buffer;
layout(set = 0, binding = 4) uniform sampler depth_buffer_sampler;
void main() {
uvec2 screen_pos = gl_GlobalInvocationID.xy;
if (any(greaterThanEqual(screen_pos, params.screen_size))) {
return;
}
uvec2 cluster_pos = screen_pos >> params.cluster_shift;
uint offset = cluster_pos.y * params.cluster_screen_size.x + cluster_pos.x;
offset += params.cluster_screen_size.x * params.cluster_screen_size.y * params.cluster_type;
offset *= (params.max_cluster_element_count_div_32 + 32);
//depth buffers generally can't be accessed via image API
float depth = texelFetch(sampler2D(depth_buffer, depth_buffer_sampler), ivec2(screen_pos), 0).r * 2.0 - 1.0;
if (params.orthogonal) {
depth = ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0;
} else {
depth = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - depth * (params.z_far - params.z_near));
}
depth /= params.z_far;
uint slice = uint(clamp(floor(depth * 32.0), 0.0, 31.0));
uint slice_minmax = cluster_data.data[offset + params.max_cluster_element_count_div_32 + slice];
uint item_min = slice_minmax & 0xFFFF;
uint item_max = slice_minmax >> 16;
uint item_count = 0;
for (uint i = 0; i < params.max_cluster_element_count_div_32; i++) {
uint slice_bits = cluster_data.data[offset + i];
while (slice_bits != 0) {
uint bit = findLSB(slice_bits);
uint item = i * 32 + bit;
if ((item >= item_min && item < item_max)) {
item_count++;
}
slice_bits &= ~(1 << bit);
}
}
item_count = min(item_count, 32);
vec3 color = usage_gradient[item_count];
color = mix(color * 1.2, color * 0.3, float(slice) / 31.0);
imageStore(screen_buffer, ivec2(screen_pos), vec4(color, 1.0));
}

View file

@ -0,0 +1,168 @@
#[vertex]
#version 450
VERSION_DEFINES
layout(location = 0) in vec3 vertex_attrib;
layout(location = 0) out float depth_interp;
layout(location = 1) out flat uint element_index;
layout(push_constant, binding = 0, std430) uniform Params {
uint base_index;
uint pad0;
uint pad1;
uint pad2;
}
params;
layout(set = 0, binding = 1, std140) uniform State {
mat4 projection;
float inv_z_far;
uint screen_to_clusters_shift; // shift to obtain coordinates in block indices
uint cluster_screen_width; //
uint cluster_data_size; // how much data for a single cluster takes
uint cluster_depth_offset;
uint pad0;
uint pad1;
uint pad2;
}
state;
struct RenderElement {
uint type; //0-4
bool touches_near;
bool touches_far;
uint original_index;
mat3x4 transform_inv;
vec3 scale;
uint pad;
};
layout(set = 0, binding = 2, std430) buffer restrict readonly RenderElements {
RenderElement data[];
}
render_elements;
void main() {
element_index = params.base_index + gl_InstanceIndex;
vec3 vertex = vertex_attrib;
vertex *= render_elements.data[element_index].scale;
vertex = vec4(vertex, 1.0) * render_elements.data[element_index].transform_inv;
depth_interp = -vertex.z;
gl_Position = state.projection * vec4(vertex, 1.0);
}
#[fragment]
#version 450
VERSION_DEFINES
#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic) && defined(GL_KHR_shader_subgroup_vote)
#extension GL_KHR_shader_subgroup_ballot : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
#extension GL_KHR_shader_subgroup_vote : enable
#define USE_SUBGROUPS
#endif
layout(location = 0) in float depth_interp;
layout(location = 1) in flat uint element_index;
layout(set = 0, binding = 1, std140) uniform State {
mat4 projection;
float inv_z_far;
uint screen_to_clusters_shift; // shift to obtain coordinates in block indices
uint cluster_screen_width; //
uint cluster_data_size; // how much data for a single cluster takes
uint cluster_depth_offset;
uint pad0;
uint pad1;
uint pad2;
}
state;
//cluster data is layout linearly, each cell contains the follow information:
// - list of bits for every element to mark as used, so (max_elem_count/32)*4 uints
// - a uint for each element to mark the depth bits used when rendering (0-31)
layout(set = 0, binding = 3, std430) buffer restrict ClusterRender {
uint data[];
}
cluster_render;
void main() {
//convert from screen to cluster
uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift;
//get linear cluster offset from screen poss
uint cluster_offset = cluster.x + state.cluster_screen_width * cluster.y;
//multiply by data size to position at the beginning of the element list for this cluster
cluster_offset *= state.cluster_data_size;
//find the current element in the list and plot the bit to mark it as used
uint usage_write_offset = cluster_offset + (element_index >> 5);
uint usage_write_bit = 1 << (element_index & 0x1F);
#ifdef USE_SUBGROUPS
uint cluster_thread_group_index;
if (!gl_HelperInvocation) {
//http://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
uvec4 mask;
while (true) {
// find the cluster offset of the first active thread
// threads that did break; go inactive and no longer count
uint first = subgroupBroadcastFirst(cluster_offset);
// update the mask for thread that match this cluster
mask = subgroupBallot(first == cluster_offset);
if (first == cluster_offset) {
// This thread belongs to the group of threads that match this offset,
// so exit the loop.
break;
}
}
cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask);
if (cluster_thread_group_index == 0) {
atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
}
}
#else
if (!gl_HelperInvocation) {
atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
}
#endif
//find the current element in the depth usage list and mark the current depth as used
float unit_depth = depth_interp * state.inv_z_far;
uint z_bit = clamp(uint(floor(unit_depth * 32.0)), 0, 31);
uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index;
uint z_write_bit = 1 << z_bit;
#ifdef USE_SUBGROUPS
if (!gl_HelperInvocation) {
z_write_bit = subgroupOr(z_write_bit); //merge all Zs
if (cluster_thread_group_index == 0) {
atomicOr(cluster_render.data[z_write_offset], z_write_bit);
}
}
#else
if (!gl_HelperInvocation) {
atomicOr(cluster_render.data[z_write_offset], z_write_bit);
}
#endif
}

View file

@ -0,0 +1,119 @@
#[compute]
#version 450
VERSION_DEFINES
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(push_constant, binding = 0, std430) uniform Params {
uint cluster_render_data_size; // how much data for a single cluster takes
uint max_render_element_count_div_32; //divided by 32
uvec2 cluster_screen_size;
uint render_element_count_div_32; //divided by 32
uint max_cluster_element_count_div_32; //divided by 32
uint pad1;
uint pad2;
}
params;
layout(set = 0, binding = 1, std430) buffer restrict readonly ClusterRender {
uint data[];
}
cluster_render;
layout(set = 0, binding = 2, std430) buffer restrict ClusterStore {
uint data[];
}
cluster_store;
struct RenderElement {
uint type; //0-4
bool touches_near;
bool touches_far;
uint original_index;
mat3x4 transform_inv;
vec3 scale;
uint pad;
};
layout(set = 0, binding = 3, std430) buffer restrict readonly RenderElements {
RenderElement data[];
}
render_elements;
void main() {
uvec2 pos = gl_GlobalInvocationID.xy;
if (any(greaterThanEqual(pos, params.cluster_screen_size))) {
return;
}
//counter for each type of render_element
//base offset for this cluster
uint base_offset = (pos.x + params.cluster_screen_size.x * pos.y);
uint src_offset = base_offset * params.cluster_render_data_size;
uint render_element_offset = 0;
//check all render_elements and see which one was written to
while (render_element_offset < params.render_element_count_div_32) {
uint bits = cluster_render.data[src_offset + render_element_offset];
while (bits != 0) {
//if bits exist, check the render_element
uint index_bit = findLSB(bits);
uint index = render_element_offset * 32 + index_bit;
uint type = render_elements.data[index].type;
uint z_range_offset = src_offset + params.max_render_element_count_div_32 + index;
uint z_range = cluster_render.data[z_range_offset];
//if object was written, z was written, but check just in case
if (z_range != 0) { //should always be > 0
uint from_z = findLSB(z_range);
uint to_z = findMSB(z_range) + 1;
if (render_elements.data[index].touches_near) {
from_z = 0;
}
if (render_elements.data[index].touches_far) {
to_z = 32;
}
// find cluster offset in the buffer used for indexing in the renderer
uint dst_offset = (base_offset + type * (params.cluster_screen_size.x * params.cluster_screen_size.y)) * (params.max_cluster_element_count_div_32 + 32);
uint orig_index = render_elements.data[index].original_index;
//store this index in the Z slices by setting the relevant bit
for (uint i = from_z; i < to_z; i++) {
uint slice_ofs = dst_offset + params.max_cluster_element_count_div_32 + i;
uint minmax = cluster_store.data[slice_ofs];
if (minmax == 0) {
minmax = 0xFFFF; //min 0, max 0xFFFF
}
uint elem_min = min(orig_index, minmax & 0xFFFF);
uint elem_max = max(orig_index + 1, minmax >> 16); //always store plus one, so zero means range is empty when not written to
minmax = elem_min | (elem_max << 16);
cluster_store.data[slice_ofs] = minmax;
}
uint store_word = orig_index >> 5;
uint store_bit = orig_index & 0x1F;
//store the actual render_element index at the end, so the rendering code can reference it
cluster_store.data[dst_offset + store_word] |= 1 << store_bit;
}
bits &= ~(1 << index_bit); //clear the bit to continue iterating
}
render_element_offset++;
}
}

View file

@ -99,7 +99,7 @@ layout(push_constant, binding = 0, std430) uniform Params {
uint max_giprobes;
bool high_quality_vct;
bool use_sdfgi;
uint pad2;
bool orthogonal;
vec3 ao_color;
@ -331,7 +331,7 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o
}
ambient_light.rgb = diffuse;
#if 1
if (roughness < 0.2) {
vec3 pos_to_uvw = 1.0 / sdfgi.grid_size;
vec4 light_accum = vec4(0.0);
@ -363,7 +363,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o
//ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion
ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell;
}
float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade
while (length(ray_pos) < max_distance) {
for (uint i = 0; i < sdfgi.max_cascades; i++) {
@ -434,8 +433,6 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o
}
}
#endif
reflection_light.rgb = specular;
ambient_light.rgb *= sdfgi.energy;
@ -621,11 +618,12 @@ void main() {
vec3 reflection = normalize(reflect(normalize(vertex), normal));
if (params.use_sdfgi) {
sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light);
}
#ifdef USE_SDFGI
sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light);
#endif
if (params.max_giprobes > 0) {
#ifdef USE_GIPROBES
{
uvec2 giprobe_tex = texelFetch(usampler2D(giprobe_buffer, linear_sampler), pos, 0).rg;
roughness *= roughness;
//find arbitrary tangent and bitangent, then build a matrix
@ -656,6 +654,7 @@ void main() {
ambient_light = amb_accum;
}
}
#endif
}
imageStore(ambient_buffer, pos, ambient_light);

View file

@ -541,7 +541,7 @@ vec3 F0(float metallic, float specular, vec3 albedo) {
return mix(vec3(dielectric), albedo, vec3(metallic));
}
void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms,
void light_compute(vec3 N, vec3 L, vec3 V, vec3 light_color, float attenuation, vec3 f0, uint orms, float specular_amount,
#ifdef LIGHT_BACKLIGHT_USED
vec3 backlight,
#endif
@ -710,7 +710,7 @@ LIGHT_SHADER_CODE
blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI));
float intensity = blinn;
specular_light += light_color * intensity * attenuation;
specular_light += light_color * intensity * attenuation * specular_amount;
#elif defined(SPECULAR_PHONG)
@ -721,7 +721,7 @@ LIGHT_SHADER_CODE
phong *= (shininess + 8.0) * (1.0 / (8.0 * M_PI));
float intensity = (phong) / max(4.0 * cNdotV * cNdotL, 0.75);
specular_light += light_color * intensity * attenuation;
specular_light += light_color * intensity * attenuation * specular_amount;
#elif defined(SPECULAR_TOON)
@ -730,7 +730,7 @@ LIGHT_SHADER_CODE
float mid = 1.0 - roughness;
mid *= mid;
float intensity = smoothstep(mid - roughness * 0.5, mid + roughness * 0.5, RdotV) * mid;
diffuse_light += light_color * intensity * attenuation; // write to diffuse_light, as in toon shading you generally want no reflection
diffuse_light += light_color * intensity * attenuation * specular_amount; // write to diffuse_light, as in toon shading you generally want no reflection
#elif defined(SPECULAR_DISABLED)
// none..
@ -760,7 +760,7 @@ LIGHT_SHADER_CODE
vec3 specular_brdf_NL = cNdotL * D * F * G;
specular_light += specular_brdf_NL * light_color * attenuation;
specular_light += specular_brdf_NL * light_color * attenuation * specular_amount;
#endif
#if defined(LIGHT_CLEARCOAT_USED)
@ -774,7 +774,7 @@ LIGHT_SHADER_CODE
float clearcoat_specular_brdf_NL = 0.25 * clearcoat * Gr * Fr * Dr * cNdotL;
specular_light += clearcoat_specular_brdf_NL * light_color * attenuation;
specular_light += clearcoat_specular_brdf_NL * light_color * attenuation * specular_amount;
#endif
}
@ -903,28 +903,28 @@ float get_omni_attenuation(float distance, float inv_range, float decay) {
float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
#ifndef USE_NO_SHADOWS
if (lights.data[idx].shadow_enabled) {
if (omni_lights.data[idx].shadow_enabled) {
// there is a shadowmap
vec3 light_rel_vec = lights.data[idx].position - vertex;
vec3 light_rel_vec = omni_lights.data[idx].position - vertex;
float light_length = length(light_rel_vec);
vec4 v = vec4(vertex, 1.0);
vec4 splane = (lights.data[idx].shadow_matrix * v);
vec4 splane = (omni_lights.data[idx].shadow_matrix * v);
float shadow_len = length(splane.xyz); //need to remember shadow len from here
{
vec3 nofs = normal_interp * lights.data[idx].shadow_normal_bias / lights.data[idx].inv_radius;
vec3 nofs = normal_interp * omni_lights.data[idx].shadow_normal_bias / omni_lights.data[idx].inv_radius;
nofs *= (1.0 - max(0.0, dot(normalize(light_rel_vec), normalize(normal_interp))));
v.xyz += nofs;
splane = (lights.data[idx].shadow_matrix * v);
splane = (omni_lights.data[idx].shadow_matrix * v);
}
float shadow;
#ifdef USE_SOFT_SHADOWS
if (lights.data[idx].soft_shadow_size > 0.0) {
if (omni_lights.data[idx].soft_shadow_size > 0.0) {
//soft shadow
//find blocker
@ -944,10 +944,10 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
vec3 tangent = normalize(cross(v0, normal));
vec3 bitangent = normalize(cross(tangent, normal));
float z_norm = shadow_len * lights.data[idx].inv_radius;
float z_norm = shadow_len * omni_lights.data[idx].inv_radius;
tangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale;
bitangent *= lights.data[idx].soft_shadow_size * lights.data[idx].soft_shadow_scale;
tangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale;
bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy;
@ -955,7 +955,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y;
pos = normalize(pos);
vec4 uv_rect = lights.data[idx].atlas_rect;
vec4 uv_rect = omni_lights.data[idx].atlas_rect;
if (pos.z >= 0.0) {
pos.z += 1.0;
@ -983,7 +983,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
tangent *= penumbra;
bitangent *= penumbra;
z_norm -= lights.data[idx].inv_radius * lights.data[idx].shadow_bias;
z_norm -= omni_lights.data[idx].inv_radius * omni_lights.data[idx].shadow_bias;
shadow = 0.0;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
@ -991,7 +991,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
vec3 pos = splane.xyz + tangent * disk.x + bitangent * disk.y;
pos = normalize(pos);
vec4 uv_rect = lights.data[idx].atlas_rect;
vec4 uv_rect = omni_lights.data[idx].atlas_rect;
if (pos.z >= 0.0) {
pos.z += 1.0;
@ -1016,7 +1016,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
} else {
#endif
splane.xyz = normalize(splane.xyz);
vec4 clamp_rect = lights.data[idx].atlas_rect;
vec4 clamp_rect = omni_lights.data[idx].atlas_rect;
if (splane.z >= 0.0) {
splane.z += 1.0;
@ -1030,10 +1030,10 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) {
splane.xy /= splane.z;
splane.xy = splane.xy * 0.5 + 0.5;
splane.z = (shadow_len - lights.data[idx].shadow_bias) * lights.data[idx].inv_radius;
splane.z = (shadow_len - omni_lights.data[idx].shadow_bias) * omni_lights.data[idx].inv_radius;
splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
splane.w = 1.0; //needed? i think it should be 1 already
shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane);
shadow = sample_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, splane);
#ifdef USE_SOFT_SHADOWS
}
#endif
@ -1068,17 +1068,17 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
inout float alpha,
#endif
inout vec3 diffuse_light, inout vec3 specular_light) {
vec3 light_rel_vec = lights.data[idx].position - vertex;
vec3 light_rel_vec = omni_lights.data[idx].position - vertex;
float light_length = length(light_rel_vec);
float omni_attenuation = get_omni_attenuation(light_length, lights.data[idx].inv_radius, lights.data[idx].attenuation);
float omni_attenuation = get_omni_attenuation(light_length, omni_lights.data[idx].inv_radius, omni_lights.data[idx].attenuation);
float light_attenuation = omni_attenuation;
vec3 color = lights.data[idx].color;
vec3 color = omni_lights.data[idx].color;
#ifdef USE_SOFT_SHADOWS
float size_A = 0.0;
if (lights.data[idx].size > 0.0) {
float t = lights.data[idx].size / max(0.001, light_length);
if (omni_lights.data[idx].size > 0.0) {
float t = omni_lights.data[idx].size / max(0.001, light_length);
size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t));
}
#endif
@ -1087,10 +1087,10 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
float transmittance_z = transmittance_depth; //no transmittance by default
transmittance_color.a *= light_attenuation;
{
vec4 clamp_rect = lights.data[idx].atlas_rect;
vec4 clamp_rect = omni_lights.data[idx].atlas_rect;
//redo shadowmapping, but shrink the model a bit to avoid arctifacts
vec4 splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0));
vec4 splane = (omni_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * omni_lights.data[idx].transmittance_bias, 1.0));
shadow_len = length(splane.xyz);
splane = normalize(splane.xyz);
@ -1104,22 +1104,22 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
splane.xy /= splane.z;
splane.xy = splane.xy * 0.5 + 0.5;
splane.z = shadow_len * lights.data[idx].inv_radius;
splane.z = shadow_len * omni_lights.data[idx].inv_radius;
splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
splane.w = 1.0; //needed? i think it should be 1 already
float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r;
transmittance_z = (splane.z - shadow_z) / lights.data[idx].inv_radius;
transmittance_z = (splane.z - shadow_z) / omni_lights.data[idx].inv_radius;
}
#endif
#if 0
if (lights.data[idx].projector_rect != vec4(0.0)) {
vec3 local_v = (lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz;
if (omni_lights.data[idx].projector_rect != vec4(0.0)) {
vec3 local_v = (omni_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)).xyz;
local_v = normalize(local_v);
vec4 atlas_rect = lights.data[idx].projector_rect;
vec4 atlas_rect = omni_lights.data[idx].projector_rect;
if (local_v.z >= 0.0) {
local_v.z += 1.0;
@ -1136,7 +1136,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
vec2 proj_uv_ddx;
vec2 proj_uv_ddy;
{
vec3 local_v_ddx = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz;
vec3 local_v_ddx = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)).xyz;
local_v_ddx = normalize(local_v_ddx);
if (local_v_ddx.z >= 0.0) {
@ -1150,7 +1150,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
proj_uv_ddx = local_v_ddx.xy * atlas_rect.zw - proj_uv;
vec3 local_v_ddy = (lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz;
vec3 local_v_ddy = (omni_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)).xyz;
local_v_ddy = normalize(local_v_ddy);
if (local_v_ddy.z >= 0.0) {
@ -1172,7 +1172,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
light_attenuation *= shadow;
light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms,
light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, omni_lights.data[idx].specular_amount,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
#endif
@ -1204,37 +1204,37 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) {
#ifndef USE_NO_SHADOWS
if (lights.data[idx].shadow_enabled) {
vec3 light_rel_vec = lights.data[idx].position - vertex;
if (spot_lights.data[idx].shadow_enabled) {
vec3 light_rel_vec = spot_lights.data[idx].position - vertex;
float light_length = length(light_rel_vec);
vec3 spot_dir = lights.data[idx].direction;
vec3 spot_dir = spot_lights.data[idx].direction;
//there is a shadowmap
vec4 v = vec4(vertex, 1.0);
v.xyz -= spot_dir * lights.data[idx].shadow_bias;
v.xyz -= spot_dir * spot_lights.data[idx].shadow_bias;
float z_norm = dot(spot_dir, -light_rel_vec) * lights.data[idx].inv_radius;
float z_norm = dot(spot_dir, -light_rel_vec) * spot_lights.data[idx].inv_radius;
float depth_bias_scale = 1.0 / (max(0.0001, z_norm)); //the closer to the light origin, the more you have to offset to reach 1px in the map
vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * lights.data[idx].shadow_normal_bias * depth_bias_scale;
vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(spot_dir, -normalize(normal_interp)))) * spot_lights.data[idx].shadow_normal_bias * depth_bias_scale;
normal_bias -= spot_dir * dot(spot_dir, normal_bias); //only XY, no Z
v.xyz += normal_bias;
//adjust with bias
z_norm = dot(spot_dir, v.xyz - lights.data[idx].position) * lights.data[idx].inv_radius;
z_norm = dot(spot_dir, v.xyz - spot_lights.data[idx].position) * spot_lights.data[idx].inv_radius;
float shadow;
vec4 splane = (lights.data[idx].shadow_matrix * v);
vec4 splane = (spot_lights.data[idx].shadow_matrix * v);
splane /= splane.w;
#ifdef USE_SOFT_SHADOWS
if (lights.data[idx].soft_shadow_size > 0.0) {
if (spot_lights.data[idx].soft_shadow_size > 0.0) {
//soft shadow
//find blocker
vec2 shadow_uv = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy;
vec2 shadow_uv = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy;
float blocker_count = 0.0;
float blocker_average = 0.0;
@ -1247,11 +1247,11 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) {
disk_rotation = mat2(vec2(cr, -sr), vec2(sr, cr));
}
float uv_size = lights.data[idx].soft_shadow_size * z_norm * lights.data[idx].soft_shadow_scale;
vec2 clamp_max = lights.data[idx].atlas_rect.xy + lights.data[idx].atlas_rect.zw;
float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale;
vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size;
suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max);
suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max);
float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r;
if (d < z_norm) {
blocker_average += d;
@ -1268,7 +1268,7 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) {
shadow = 0.0;
for (uint i = 0; i < scene_data.penumbra_shadow_samples; i++) {
vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size;
suv = clamp(suv, lights.data[idx].atlas_rect.xy, clamp_max);
suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max);
shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, z_norm, 1.0));
}
@ -1282,9 +1282,9 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) {
} else {
#endif
//hard shadow
vec4 shadow_uv = vec4(splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy, z_norm, 1.0);
vec4 shadow_uv = vec4(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, z_norm, 1.0);
shadow = sample_pcf_shadow(shadow_atlas, lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv);
shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv);
#ifdef USE_SOFT_SHADOWS
}
#endif
@ -1321,28 +1321,28 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
#endif
inout vec3 diffuse_light,
inout vec3 specular_light) {
vec3 light_rel_vec = lights.data[idx].position - vertex;
vec3 light_rel_vec = spot_lights.data[idx].position - vertex;
float light_length = length(light_rel_vec);
float spot_attenuation = get_omni_attenuation(light_length, lights.data[idx].inv_radius, lights.data[idx].attenuation);
vec3 spot_dir = lights.data[idx].direction;
float scos = max(dot(-normalize(light_rel_vec), spot_dir), lights.data[idx].cone_angle);
float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - lights.data[idx].cone_angle));
spot_attenuation *= 1.0 - pow(spot_rim, lights.data[idx].cone_attenuation);
float spot_attenuation = get_omni_attenuation(light_length, spot_lights.data[idx].inv_radius, spot_lights.data[idx].attenuation);
vec3 spot_dir = spot_lights.data[idx].direction;
float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[idx].cone_angle);
float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[idx].cone_angle));
spot_attenuation *= 1.0 - pow(spot_rim, spot_lights.data[idx].cone_attenuation);
float light_attenuation = spot_attenuation;
vec3 color = lights.data[idx].color;
float specular_amount = lights.data[idx].specular_amount;
vec3 color = spot_lights.data[idx].color;
float specular_amount = spot_lights.data[idx].specular_amount;
#ifdef USE_SOFT_SHADOWS
float size_A = 0.0;
if (lights.data[idx].size > 0.0) {
float t = lights.data[idx].size / max(0.001, light_length);
if (spot_lights.data[idx].size > 0.0) {
float t = spot_lights.data[idx].size / max(0.001, light_length);
size_A = max(0.0, 1.0 - 1 / sqrt(1 + t * t));
}
#endif
/*
if (lights.data[idx].atlas_rect!=vec4(0.0)) {
if (spot_lights.data[idx].atlas_rect!=vec4(0.0)) {
//use projector texture
}
*/
@ -1351,13 +1351,13 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
float transmittance_z = transmittance_depth;
transmittance_color.a *= light_attenuation;
{
splane = (lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * lights.data[idx].transmittance_bias, 1.0));
splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex - normalize(normal_interp) * spot_lights.data[idx].transmittance_bias, 1.0));
splane /= splane.w;
splane.xy = splane.xy * lights.data[idx].atlas_rect.zw + lights.data[idx].atlas_rect.xy;
splane.xy = splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy;
float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r;
//reconstruct depth
shadow_z /= lights.data[idx].inv_radius;
shadow_z /= spot_lights.data[idx].inv_radius;
//distance to light plane
float z = dot(spot_dir, -light_rel_vec);
transmittance_z = z - shadow_z;
@ -1366,7 +1366,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v
light_attenuation *= shadow;
light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms,
light_compute(normal, normalize(light_rel_vec), eye_vec, color, light_attenuation, f0, orms, spot_lights.data[idx].specular_amount,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
#endif
@ -1785,7 +1785,43 @@ vec4 fog_process(vec3 vertex) {
return vec4(fog_color, fog_amount);
}
void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) {
uint item_min_max = cluster_buffer.data[p_offset];
item_min = item_min_max & 0xFFFF;
item_max = item_min_max >> 16;
;
item_from = item_min >> 5;
item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements
}
uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) {
int local_min = clamp(int(z_min) - int(i) * 32, 0, 31);
int mask_width = min(int(z_max) - int(z_min), 32 - local_min);
return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width);
}
float blur_shadow(float shadow) {
return shadow;
#if 0
//disabling for now, will investigate later
float interp_shadow = shadow;
if (gl_HelperInvocation) {
interp_shadow = -4.0; // technically anything below -4 will do but just to make sure
}
uvec2 fc2 = uvec2(gl_FragCoord.xy);
interp_shadow -= dFdx(interp_shadow) * (float(fc2.x & 1) - 0.5);
interp_shadow -= dFdy(interp_shadow) * (float(fc2.y & 1) - 0.5);
if (interp_shadow >= 0.0) {
shadow = interp_shadow;
}
return shadow;
#endif
}
#endif //!MODE_RENDER DEPTH
void main() {
#ifdef MODE_DUAL_PARABOLOID
@ -2003,67 +2039,98 @@ FRAGMENT_SHADER_CODE
#ifndef MODE_RENDER_DEPTH
uvec4 cluster_cell = texture(usampler3D(cluster_texture, material_samplers[SAMPLER_NEAREST_CLAMP]), vec3(screen_uv, (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near)));
uvec2 cluster_pos = uvec2(gl_FragCoord.xy) >> scene_data.cluster_shift;
uint cluster_offset = (scene_data.cluster_width * cluster_pos.y + cluster_pos.x) * (scene_data.max_cluster_element_count_div_32 + 32);
uint cluster_z = uint(clamp((-vertex.z / scene_data.z_far) * 32.0, 0.0, 31.0));
//used for interpolating anything cluster related
vec3 vertex_ddx = dFdx(vertex);
vec3 vertex_ddy = dFdy(vertex);
{ // process decals
uint decal_count = cluster_cell.w >> CLUSTER_COUNTER_SHIFT;
uint decal_pointer = cluster_cell.w & CLUSTER_POINTER_MASK;
uint cluster_decal_offset = cluster_offset + scene_data.cluster_type_size * 2;
//do outside for performance and avoiding arctifacts
uint item_min;
uint item_max;
uint item_from;
uint item_to;
for (uint i = 0; i < decal_count; i++) {
uint decal_index = cluster_data.indices[decal_pointer + i];
if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) {
continue; //not masked
}
cluster_get_item_range(cluster_decal_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz;
if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) {
continue; //out of decal
}
#ifdef USE_SUBGROUPS
item_from = subgroupBroadcastFirst(subgroupMin(item_from));
item_to = subgroupBroadcastFirst(subgroupMax(item_to));
#endif
//we need ddx/ddy for mipmaps, so simulate them
vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz;
vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz;
for (uint i = item_from; i < item_to; i++) {
uint mask = cluster_buffer.data[cluster_decal_offset + i];
mask &= cluster_get_range_clip_mask(i, item_min, item_max);
#ifdef USE_SUBGROUPS
uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
#else
uint merged_mask = mask;
#endif
float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade);
while (merged_mask != 0) {
uint bit = findMSB(merged_mask);
merged_mask &= ~(1 << bit);
#ifdef USE_SUBGROUPS
if (((1 << bit) & mask) == 0) { //do not process if not originally here
continue;
}
#endif
uint decal_index = 32 * i + bit;
if (decals.data[decal_index].normal_fade > 0.0) {
fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5);
}
if (decals.data[decal_index].albedo_rect != vec4(0.0)) {
//has albedo
vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw);
decal_albedo *= decals.data[decal_index].modulate;
decal_albedo.a *= fade;
albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix);
if (decals.data[decal_index].normal_rect != vec4(0.0)) {
vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz;
decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software
decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy)));
//convert to view space, use xzy because y is up
decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz;
normal = normalize(mix(normal, decal_normal, decal_albedo.a));
if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) {
continue; //not masked
}
if (decals.data[decal_index].orm_rect != vec4(0.0)) {
vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz;
ao = mix(ao, decal_orm.r, decal_albedo.a);
roughness = mix(roughness, decal_orm.g, decal_albedo.a);
metallic = mix(metallic, decal_orm.b, decal_albedo.a);
vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz;
if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) {
continue; //out of decal
}
}
if (decals.data[decal_index].emission_rect != vec4(0.0)) {
//emission is additive, so its independent from albedo
emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade;
//we need ddx/ddy for mipmaps, so simulate them
vec2 ddx = (decals.data[decal_index].xform * vec4(vertex_ddx, 0.0)).xz;
vec2 ddy = (decals.data[decal_index].xform * vec4(vertex_ddy, 0.0)).xz;
float fade = pow(1.0 - (uv_local.y > 0.0 ? uv_local.y : -uv_local.y), uv_local.y > 0.0 ? decals.data[decal_index].upper_fade : decals.data[decal_index].lower_fade);
if (decals.data[decal_index].normal_fade > 0.0) {
fade *= smoothstep(decals.data[decal_index].normal_fade, 1.0, dot(normal_interp, decals.data[decal_index].normal) * 0.5 + 0.5);
}
if (decals.data[decal_index].albedo_rect != vec4(0.0)) {
//has albedo
vec4 decal_albedo = textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].albedo_rect.zw + decals.data[decal_index].albedo_rect.xy, ddx * decals.data[decal_index].albedo_rect.zw, ddy * decals.data[decal_index].albedo_rect.zw);
decal_albedo *= decals.data[decal_index].modulate;
decal_albedo.a *= fade;
albedo = mix(albedo, decal_albedo.rgb, decal_albedo.a * decals.data[decal_index].albedo_mix);
if (decals.data[decal_index].normal_rect != vec4(0.0)) {
vec3 decal_normal = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].normal_rect.zw + decals.data[decal_index].normal_rect.xy, ddx * decals.data[decal_index].normal_rect.zw, ddy * decals.data[decal_index].normal_rect.zw).xyz;
decal_normal.xy = decal_normal.xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); //users prefer flipped y normal maps in most authoring software
decal_normal.z = sqrt(max(0.0, 1.0 - dot(decal_normal.xy, decal_normal.xy)));
//convert to view space, use xzy because y is up
decal_normal = (decals.data[decal_index].normal_xform * decal_normal.xzy).xyz;
normal = normalize(mix(normal, decal_normal, decal_albedo.a));
}
if (decals.data[decal_index].orm_rect != vec4(0.0)) {
vec3 decal_orm = textureGrad(sampler2D(decal_atlas, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].orm_rect.zw + decals.data[decal_index].orm_rect.xy, ddx * decals.data[decal_index].orm_rect.zw, ddy * decals.data[decal_index].orm_rect.zw).xyz;
ao = mix(ao, decal_orm.r, decal_albedo.a);
roughness = mix(roughness, decal_orm.g, decal_albedo.a);
metallic = mix(metallic, decal_orm.b, decal_albedo.a);
}
}
if (decals.data[decal_index].emission_rect != vec4(0.0)) {
//emission is additive, so its independent from albedo
emission += textureGrad(sampler2D(decal_atlas_srgb, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade;
}
}
}
}
@ -2348,12 +2415,45 @@ FRAGMENT_SHADER_CODE
vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0);
vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0);
uint reflection_probe_count = cluster_cell.z >> CLUSTER_COUNTER_SHIFT;
uint reflection_probe_pointer = cluster_cell.z & CLUSTER_POINTER_MASK;
uint cluster_reflection_offset = cluster_offset + scene_data.cluster_type_size * 3;
for (uint i = 0; i < reflection_probe_count; i++) {
uint ref_index = cluster_data.indices[reflection_probe_pointer + i];
reflection_process(ref_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum);
uint item_min;
uint item_max;
uint item_from;
uint item_to;
cluster_get_item_range(cluster_reflection_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
#ifdef USE_SUBGROUPS
item_from = subgroupBroadcastFirst(subgroupMin(item_from));
item_to = subgroupBroadcastFirst(subgroupMax(item_to));
#endif
for (uint i = item_from; i < item_to; i++) {
uint mask = cluster_buffer.data[cluster_reflection_offset + i];
mask &= cluster_get_range_clip_mask(i, item_min, item_max);
#ifdef USE_SUBGROUPS
uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
#else
uint merged_mask = mask;
#endif
while (merged_mask != 0) {
uint bit = findMSB(merged_mask);
merged_mask &= ~(1 << bit);
#ifdef USE_SUBGROUPS
if (((1 << bit) & mask) == 0) { //do not process if not originally here
continue;
}
#endif
uint reflection_index = 32 * i + bit;
if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) {
continue; //not masked
}
reflection_process(reflection_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum);
}
}
if (reflection_accum.a > 0.0) {
@ -2800,7 +2900,9 @@ FRAGMENT_SHADER_CODE
shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0;
}
light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms,
blur_shadow(shadow);
light_compute(normal, directional_lights.data[i].direction, normalize(view), directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
#endif
@ -2833,154 +2935,146 @@ FRAGMENT_SHADER_CODE
{ //omni lights
uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT;
uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK;
uint cluster_omni_offset = cluster_offset;
// Do shadow and lighting in two passes to reduce register pressure
uint shadow0 = 0;
uint shadow1 = 0;
uint shadow2 = 0;
uint item_min;
uint item_max;
uint item_from;
uint item_to;
for (uint i = 0; i < 18; i++) {
if (i >= omni_light_count) {
break;
}
uint light_index = cluster_data.indices[omni_light_pointer + i];
cluster_get_item_range(cluster_omni_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) {
continue; //not masked
}
#ifdef USE_SUBGROUPS
item_from = subgroupBroadcastFirst(subgroupMin(item_from));
item_to = subgroupBroadcastFirst(subgroupMax(item_to));
#endif
float s = light_process_omni_shadow(light_index, vertex, view);
if (i < 6) {
shadow0 |= uint(clamp(s * 31.0, 0.0, 31.0)) << (i * 5);
} else if (i < 12) {
shadow1 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 6) * 5);
} else {
shadow2 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 12) * 5);
}
}
for (uint i = item_from; i < item_to; i++) {
uint mask = cluster_buffer.data[cluster_omni_offset + i];
mask &= cluster_get_range_clip_mask(i, item_min, item_max);
#ifdef USE_SUBGROUPS
uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
#else
uint merged_mask = mask;
#endif
for (uint i = 0; i < 18; i++) {
if (i == omni_light_count) {
break;
}
uint light_index = cluster_data.indices[omni_light_pointer + i];
while (merged_mask != 0) {
uint bit = findMSB(merged_mask);
merged_mask &= ~(1 << bit);
#ifdef USE_SUBGROUPS
if (((1 << bit) & mask) == 0) { //do not process if not originally here
continue;
}
#endif
uint light_index = 32 * i + bit;
if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) {
continue; //not masked
}
if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) {
continue; //not masked
}
float shadow;
if (i < 6) {
shadow = float(shadow0 >> (i * 5) & 0x1F) / 31.0;
} else if (i < 12) {
shadow = float(shadow1 >> ((i - 6) * 5) & 0x1F) / 31.0;
} else {
shadow = float(shadow1 >> ((i - 12) * 5) & 0x1F) / 31.0;
}
float shadow = light_process_omni_shadow(light_index, vertex, view);
light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
shadow = blur_shadow(shadow);
light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
backlight,
#endif
#ifdef LIGHT_TRANSMITTANCE_USED
transmittance_color,
transmittance_depth,
transmittance_curve,
transmittance_boost,
transmittance_color,
transmittance_depth,
transmittance_curve,
transmittance_boost,
#endif
#ifdef LIGHT_RIM_USED
rim,
rim_tint,
albedo,
rim,
rim_tint,
albedo,
#endif
#ifdef LIGHT_CLEARCOAT_USED
clearcoat, clearcoat_gloss,
clearcoat, clearcoat_gloss,
#endif
#ifdef LIGHT_ANISOTROPY_USED
tangent, binormal, anisotropy,
tangent, binormal, anisotropy,
#endif
#ifdef USE_SHADOW_TO_OPACITY
alpha,
alpha,
#endif
diffuse_light, specular_light);
diffuse_light, specular_light);
}
}
}
{ //spot lights
uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT;
uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK;
// Do shadow and lighting in two passes to reduce register pressure
uint shadow0 = 0;
uint shadow1 = 0;
uint shadow2 = 0;
uint cluster_spot_offset = cluster_offset + scene_data.cluster_type_size;
for (uint i = 0; i < 18; i++) {
if (i >= spot_light_count) {
break;
}
uint light_index = cluster_data.indices[spot_light_pointer + i];
uint item_min;
uint item_max;
uint item_from;
uint item_to;
if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) {
continue; //not masked
}
cluster_get_item_range(cluster_spot_offset + scene_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
float s = light_process_spot_shadow(light_index, vertex, view);
if (i < 6) {
shadow0 |= uint(clamp(s * 31.0, 0.0, 31.0)) << (i * 5);
} else if (i < 12) {
shadow1 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 6) * 5);
} else {
shadow2 |= uint(clamp(s * 31.0, 0.0, 31.0)) << ((i - 12) * 5);
}
}
#ifdef USE_SUBGROUPS
item_from = subgroupBroadcastFirst(subgroupMin(item_from));
item_to = subgroupBroadcastFirst(subgroupMax(item_to));
#endif
for (uint i = 0; i < 18; i++) {
if (i == spot_light_count) {
break;
}
uint light_index = cluster_data.indices[spot_light_pointer + i];
for (uint i = item_from; i < item_to; i++) {
uint mask = cluster_buffer.data[cluster_spot_offset + i];
mask &= cluster_get_range_clip_mask(i, item_min, item_max);
#ifdef USE_SUBGROUPS
uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
#else
uint merged_mask = mask;
#endif
if (!bool(lights.data[light_index].mask & draw_call.layer_mask)) {
continue; //not masked
}
while (merged_mask != 0) {
uint bit = findMSB(merged_mask);
merged_mask &= ~(1 << bit);
#ifdef USE_SUBGROUPS
if (((1 << bit) & mask) == 0) { //do not process if not originally here
continue;
}
#endif
float shadow;
if (i < 6) {
shadow = float(shadow0 >> (i * 5) & 0x1F) / 31.0;
} else if (i < 12) {
shadow = float(shadow1 >> ((i - 6) * 5) & 0x1F) / 31.0;
} else {
shadow = float(shadow1 >> ((i - 12) * 5) & 0x1F) / 31.0;
}
uint light_index = 32 * i + bit;
light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) {
continue; //not masked
}
float shadow = light_process_spot_shadow(light_index, vertex, view);
shadow = blur_shadow(shadow);
light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow,
#ifdef LIGHT_BACKLIGHT_USED
backlight,
backlight,
#endif
#ifdef LIGHT_TRANSMITTANCE_USED
transmittance_color,
transmittance_depth,
transmittance_curve,
transmittance_boost,
transmittance_color,
transmittance_depth,
transmittance_curve,
transmittance_boost,
#endif
#ifdef LIGHT_RIM_USED
rim,
rim_tint,
albedo,
rim,
rim_tint,
albedo,
#endif
#ifdef LIGHT_CLEARCOAT_USED
clearcoat, clearcoat_gloss,
clearcoat, clearcoat_gloss,
#endif
#ifdef LIGHT_ANISOTROPY_USED
tangent, binormal, anisotropy,
tangent, binormal, anisotropy,
#endif
#ifdef USE_SHADOW_TO_OPACITY
alpha,
alpha,
#endif
diffuse_light, specular_light);
diffuse_light, specular_light);
}
}
}

View file

@ -3,6 +3,15 @@
#define MAX_GI_PROBES 8
#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic)
#extension GL_KHR_shader_subgroup_ballot : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
#define USE_SUBGROUPS
#endif
#include "cluster_data_inc.glsl"
#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_GIPROBE) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED)
@ -52,6 +61,11 @@ layout(set = 0, binding = 3, std140) uniform SceneData {
vec2 viewport_size;
vec2 screen_pixel_size;
uint cluster_shift;
uint cluster_width;
uint cluster_type_size;
uint max_cluster_element_count_div_32;
//use vec4s because std140 doesnt play nice with vec2s, z and w are wasted
vec4 directional_penumbra_shadow_kernel[32];
vec4 directional_soft_shadow_kernel[32];
@ -139,17 +153,22 @@ scene_data;
#define INSTANCE_FLAGS_SKELETON (1 << 19)
#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20)
layout(set = 0, binding = 5, std430) restrict readonly buffer Lights {
layout(set = 0, binding = 5, std430) restrict readonly buffer OmniLights {
LightData data[];
}
lights;
omni_lights;
layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData {
layout(set = 0, binding = 6, std430) restrict readonly buffer SpotLights {
LightData data[];
}
spot_lights;
layout(set = 0, binding = 7) buffer restrict readonly ReflectionProbeData {
ReflectionData data[];
}
reflections;
layout(set = 0, binding = 7, std140) uniform DirectionalLights {
layout(set = 0, binding = 8, std140) uniform DirectionalLights {
DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS];
}
directional_lights;
@ -183,16 +202,9 @@ layout(set = 0, binding = 14, std430) restrict readonly buffer Decals {
}
decals;
layout(set = 0, binding = 15) uniform utexture3D cluster_texture;
layout(set = 0, binding = 15) uniform texture2D directional_shadow_atlas;
layout(set = 0, binding = 16, std430) restrict readonly buffer ClusterData {
uint indices[];
}
cluster_data;
layout(set = 0, binding = 17) uniform texture2D directional_shadow_atlas;
layout(set = 0, binding = 18, std430) restrict readonly buffer GlobalVariableData {
layout(set = 0, binding = 16, std430) restrict readonly buffer GlobalVariableData {
vec4 data[];
}
global_variables;
@ -206,7 +218,7 @@ struct SDFGIProbeCascadeData {
float to_cell; // 1/bounds * grid_size
};
layout(set = 0, binding = 19, std140) uniform SDFGI {
layout(set = 0, binding = 17, std140) uniform SDFGI {
vec3 grid_size;
uint max_cascades;
@ -262,14 +274,19 @@ layout(set = 1, binding = 3) uniform texture2DArray lightmap_textures[MAX_LIGHTM
layout(set = 1, binding = 4) uniform texture3D gi_probe_textures[MAX_GI_PROBES];
#endif
layout(set = 1, binding = 5, std430) buffer restrict readonly ClusterBuffer {
uint data[];
}
cluster_buffer;
/* Set 3, Render Buffers */
#ifdef MODE_RENDER_SDF
layout(r16ui, set = 1, binding = 5) uniform restrict writeonly uimage3D albedo_volume_grid;
layout(r32ui, set = 1, binding = 6) uniform restrict writeonly uimage3D emission_grid;
layout(r32ui, set = 1, binding = 7) uniform restrict writeonly uimage3D emission_aniso_grid;
layout(r32ui, set = 1, binding = 8) uniform restrict uimage3D geom_facing_grid;
layout(r16ui, set = 1, binding = 6) uniform restrict writeonly uimage3D albedo_volume_grid;
layout(r32ui, set = 1, binding = 7) uniform restrict writeonly uimage3D emission_grid;
layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_aniso_grid;
layout(r32ui, set = 1, binding = 9) uniform restrict uimage3D geom_facing_grid;
//still need to be present for shaders that use it, so remap them to something
#define depth_buffer shadow_atlas
@ -278,17 +295,17 @@ layout(r32ui, set = 1, binding = 8) uniform restrict uimage3D geom_facing_grid;
#else
layout(set = 1, binding = 5) uniform texture2D depth_buffer;
layout(set = 1, binding = 6) uniform texture2D color_buffer;
layout(set = 1, binding = 6) uniform texture2D depth_buffer;
layout(set = 1, binding = 7) uniform texture2D color_buffer;
#ifndef LOW_END_MODE
layout(set = 1, binding = 7) uniform texture2D normal_roughness_buffer;
layout(set = 1, binding = 8) uniform texture2D ao_buffer;
layout(set = 1, binding = 9) uniform texture2D ambient_buffer;
layout(set = 1, binding = 10) uniform texture2D reflection_buffer;
layout(set = 1, binding = 11) uniform texture2DArray sdfgi_lightprobe_texture;
layout(set = 1, binding = 12) uniform texture3D sdfgi_occlusion_cascades;
layout(set = 1, binding = 8) uniform texture2D normal_roughness_buffer;
layout(set = 1, binding = 9) uniform texture2D ao_buffer;
layout(set = 1, binding = 10) uniform texture2D ambient_buffer;
layout(set = 1, binding = 11) uniform texture2D reflection_buffer;
layout(set = 1, binding = 12) uniform texture2DArray sdfgi_lightprobe_texture;
layout(set = 1, binding = 13) uniform texture3D sdfgi_occlusion_cascades;
struct GIProbeData {
mat4 xform;
@ -306,12 +323,12 @@ struct GIProbeData {
uint mipmaps;
};
layout(set = 1, binding = 13, std140) uniform GIProbes {
layout(set = 1, binding = 14, std140) uniform GIProbes {
GIProbeData data[MAX_GI_PROBES];
}
gi_probes;
layout(set = 1, binding = 14) uniform texture3D volumetric_fog_texture;
layout(set = 1, binding = 15) uniform texture3D volumetric_fog_texture;
#endif // LOW_END_MODE

View file

@ -143,10 +143,78 @@ void main() {
uint voxel_albedo = process_voxels.data[voxel_index].albedo;
vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F);
vec3 light_accum[6];
vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0));
uint valid_aniso = (voxel_albedo >> 15) & 0x3F;
const vec3 aniso_dir[6] = vec3[](
vec3(1, 0, 0),
vec3(0, 1, 0),
vec3(0, 0, 1),
vec3(-1, 0, 0),
vec3(0, -1, 0),
vec3(0, 0, -1));
// Add indirect light first, in order to save computation resources
#ifdef MODE_PROCESS_DYNAMIC
if (params.multibounce) {
vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size;
ivec3 probe_base_pos = ivec3(pos);
float weight_accum[6] = float[](0, 0, 0, 0, 0, 0);
ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade));
tex_pos.x += probe_base_pos.z * int(params.probe_axis_size);
tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1);
vec3 base_tex_posf = vec3(tex_pos);
vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size));
vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx;
for (uint j = 0; j < 8; j++) {
ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1);
ivec3 probe_posi = probe_base_pos;
probe_posi += offset;
// Compute weight
vec3 probe_pos = vec3(probe_posi);
vec3 probe_to_pos = pos - probe_pos;
vec3 probe_dir = normalize(-probe_to_pos);
// Compute lightprobe texture position
vec3 trilinear = vec3(1.0) - abs(probe_to_pos);
for (uint k = 0; k < 6; k++) {
if (bool(valid_aniso & (1 << k))) {
vec3 n = aniso_dir[k];
float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir));
vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0);
tex_posf.xy *= tex_pixel_size;
vec3 pos_uvw = tex_posf;
pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy;
pos_uvw.x += float(offset.z) * probe_uv_offset.z;
vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb;
light_accum[k] += indirect_light * weight;
weight_accum[k] += weight;
}
}
}
for (uint k = 0; k < 6; k++) {
if (weight_accum[k] > 0.0) {
light_accum[k] /= weight_accum[k];
light_accum[k] *= albedo;
}
}
}
#endif
{
uint rgbe = process_voxels.data[voxel_index].light;
@ -162,18 +230,10 @@ void main() {
uint aniso = process_voxels.data[voxel_index].light_aniso;
for (uint i = 0; i < 6; i++) {
float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F);
light_accum[i] = l * strength;
light_accum[i] += l * strength;
}
}
const vec3 aniso_dir[6] = vec3[](
vec3(1, 0, 0),
vec3(0, 1, 0),
vec3(0, 0, 1),
vec3(-1, 0, 0),
vec3(0, -1, 0),
vec3(0, 0, -1));
// Raytrace light
vec3 pos_to_uvw = 1.0 / params.grid_size;
@ -292,65 +352,6 @@ void main() {
}
}
// Add indirect light
if (params.multibounce) {
vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size;
ivec3 probe_base_pos = ivec3(pos);
vec4 probe_accum[6] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
float weight_accum[6] = float[](0, 0, 0, 0, 0, 0);
ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade));
tex_pos.x += probe_base_pos.z * int(params.probe_axis_size);
tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1);
vec3 base_tex_posf = vec3(tex_pos);
vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size));
vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx;
for (uint j = 0; j < 8; j++) {
ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1);
ivec3 probe_posi = probe_base_pos;
probe_posi += offset;
// Compute weight
vec3 probe_pos = vec3(probe_posi);
vec3 probe_to_pos = pos - probe_pos;
vec3 probe_dir = normalize(-probe_to_pos);
// Compute lightprobe texture position
vec3 trilinear = vec3(1.0) - abs(probe_to_pos);
for (uint k = 0; k < 6; k++) {
if (bool(valid_aniso & (1 << k))) {
vec3 n = aniso_dir[k];
float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir));
vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0);
tex_posf.xy *= tex_pixel_size;
vec3 pos_uvw = tex_posf;
pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy;
pos_uvw.x += float(offset.z) * probe_uv_offset.z;
vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0);
probe_accum[k] += indirect_light * weight;
weight_accum[k] += weight;
}
}
}
for (uint k = 0; k < 6; k++) {
if (weight_accum[k] > 0.0) {
light_accum[k] += probe_accum[k].rgb * albedo / weight_accum[k];
}
}
}
// Store the light in the light texture
float lumas[6];

View file

@ -136,12 +136,24 @@ uint rgbe_encode(vec3 color) {
return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27);
}
struct SH {
#if (SH_SIZE == 16)
float c[48];
#else
float c[28];
#endif
};
shared SH sh_accum[64]; //8x8
void main() {
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing
return;
}
uint probe_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 8;
#ifdef MODE_PROCESS
float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell;
@ -154,27 +166,9 @@ void main() {
vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size;
vec3 pos_to_uvw = 1.0 / params.grid_size;
vec4 probe_sh_accum[SH_SIZE] = vec4[](
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0)
#if (SH_SIZE == 16)
,
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0)
#endif
);
for (uint i = 0; i < SH_SIZE * 3; i++) {
sh_accum[probe_index].c[i] = 0.0;
}
// quickly ensure each probe has a different "offset" for the vogel function, based on integer world position
uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell));
@ -278,33 +272,33 @@ void main() {
}
vec3 ray_dir2 = ray_dir * ray_dir;
float c[SH_SIZE] = float[](
0.282095, //l0
0.488603 * ray_dir.y, //l1n1
0.488603 * ray_dir.z, //l1n0
0.488603 * ray_dir.x, //l1p1
1.092548 * ray_dir.x * ray_dir.y, //l2n2
1.092548 * ray_dir.y * ray_dir.z, //l2n1
0.315392 * (3.0 * ray_dir2.z - 1.0), //l20
1.092548 * ray_dir.x * ray_dir.z, //l2p1
0.546274 * (ray_dir2.x - ray_dir2.y) //l2p2
#define SH_ACCUM(m_idx, m_value) \
{ \
vec3 l = light.rgb * (m_value); \
sh_accum[probe_index].c[m_idx * 3 + 0] += l.r; \
sh_accum[probe_index].c[m_idx * 3 + 1] += l.g; \
sh_accum[probe_index].c[m_idx * 3 + 2] += l.b; \
}
SH_ACCUM(0, 0.282095); //l0
SH_ACCUM(1, 0.488603 * ray_dir.y); //l1n1
SH_ACCUM(2, 0.488603 * ray_dir.z); //l1n0
SH_ACCUM(3, 0.488603 * ray_dir.x); //l1p1
SH_ACCUM(4, 1.092548 * ray_dir.x * ray_dir.y); //l2n2
SH_ACCUM(5, 1.092548 * ray_dir.y * ray_dir.z); //l2n1
SH_ACCUM(6, 0.315392 * (3.0 * ray_dir2.z - 1.0)); //l20
SH_ACCUM(7, 1.092548 * ray_dir.x * ray_dir.z); //l2p1
SH_ACCUM(8, 0.546274 * (ray_dir2.x - ray_dir2.y)); //l2p2
#if (SH_SIZE == 16)
,
0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y),
2.890611 * ray_dir.y * ray_dir.x * ray_dir.z,
0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z),
0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z),
0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z),
1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z,
0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y)
SH_ACCUM(9, 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y));
SH_ACCUM(10, 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z);
SH_ACCUM(11, 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z));
SH_ACCUM(12, 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z));
SH_ACCUM(13, 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z));
SH_ACCUM(14, 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z);
SH_ACCUM(15, 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y));
#endif
);
for (uint j = 0; j < SH_SIZE; j++) {
probe_sh_accum[j] += light * c[j];
}
}
for (uint i = 0; i < SH_SIZE; i++) {
@ -312,7 +306,7 @@ void main() {
ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index));
ivec2 average_pos = prev_pos.xy;
vec4 value = probe_sh_accum[i] * 4.0 / float(params.ray_count);
vec4 value = vec4(sh_accum[probe_index].c[i * 3 + 0], sh_accum[probe_index].c[i * 3 + 1], sh_accum[probe_index].c[i * 3 + 2], 1.0) * 4.0 / float(params.ray_count);
ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average
@ -344,37 +338,11 @@ void main() {
ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1);
ivec2 local_pos = pos % OCT_SIZE;
//fill the spherical harmonic
vec4 sh[SH_SIZE];
for (uint i = 0; i < SH_SIZE; i++) {
// store in history texture
ivec2 average_pos = sh_pos + ivec2(0, i);
ivec4 average = imageLoad(lightprobe_average_texture, average_pos);
sh[i] = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS);
}
//compute the octahedral normal for this texel
vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE));
/*
// read the spherical harmonic
const float c1 = 0.429043;
const float c2 = 0.511664;
const float c3 = 0.743125;
const float c4 = 0.886227;
const float c5 = 0.247708;
vec4 light = (c1 * sh[8] * (normal.x * normal.x - normal.y * normal.y) +
c3 * sh[6] * normal.z * normal.z +
c4 * sh[0] -
c5 * sh[6] +
2.0 * c1 * sh[4] * normal.x * normal.y +
2.0 * c1 * sh[7] * normal.x * normal.z +
2.0 * c1 * sh[5] * normal.y * normal.z +
2.0 * c2 * sh[3] * normal.x +
2.0 * c2 * sh[1] * normal.y +
2.0 * c2 * sh[2] * normal.z);
*/
vec3 normal2 = normal * normal;
float c[SH_SIZE] = float[](
@ -426,7 +394,14 @@ void main() {
vec3 radiance = vec3(0.0);
for (uint i = 0; i < SH_SIZE; i++) {
vec3 m = sh[i].rgb * c[i] * 4.0;
// store in history texture
ivec2 average_pos = sh_pos + ivec2(0, i);
ivec4 average = imageLoad(lightprobe_average_texture, average_pos);
vec4 sh = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS);
vec3 m = sh.rgb * c[i] * 4.0;
irradiance += m * l_mult[i];
radiance += m;
}

View file

@ -4,6 +4,15 @@
VERSION_DEFINES
/* Do not use subgroups here, seems there is not much advantage and causes glitches
#extension GL_KHR_shader_subgroup_ballot: enable
#extension GL_KHR_shader_subgroup_arithmetic: enable
#if defined(GL_KHR_shader_subgroup_ballot) && defined(GL_KHR_shader_subgroup_arithmetic)
#define USE_SUBGROUPS
#endif
*/
#if defined(MODE_FOG) || defined(MODE_FILTER)
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
@ -23,22 +32,25 @@ layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
layout(set = 0, binding = 1) uniform texture2D shadow_atlas;
layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas;
layout(set = 0, binding = 3, std430) restrict readonly buffer Lights {
layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights {
LightData data[];
}
lights;
omni_lights;
layout(set = 0, binding = 4, std140) uniform DirectionalLights {
layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights {
LightData data[];
}
spot_lights;
layout(set = 0, binding = 5, std140) uniform DirectionalLights {
DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS];
}
directional_lights;
layout(set = 0, binding = 5) uniform utexture3D cluster_texture;
layout(set = 0, binding = 6, std430) restrict readonly buffer ClusterData {
uint indices[];
layout(set = 0, binding = 6, std430) buffer restrict readonly ClusterBuffer {
uint data[];
}
cluster_data;
cluster_buffer;
layout(set = 0, binding = 7) uniform sampler linear_sampler;
@ -132,7 +144,7 @@ layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture;
#endif //SDFGI
layout(push_constant, binding = 0, std430) uniform Params {
layout(set = 0, binding = 14, std140) uniform Params {
vec2 fog_frustum_size_begin;
vec2 fog_frustum_size_end;
@ -150,7 +162,14 @@ layout(push_constant, binding = 0, std430) uniform Params {
float detail_spread;
float gi_inject;
uint max_gi_probes;
uint pad;
uint cluster_type_size;
vec2 screen_size;
uint cluster_shift;
uint cluster_width;
uvec3 cluster_pad;
uint max_cluster_element_count_div_32;
mat3x4 cam_rotation;
}
@ -178,6 +197,22 @@ float get_omni_attenuation(float distance, float inv_range, float decay) {
return nd * pow(max(distance, 0.0001), -decay);
}
void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) {
uint item_min_max = cluster_buffer.data[p_offset];
item_min = item_min_max & 0xFFFF;
item_max = item_min_max >> 16;
;
item_from = item_min >> 5;
item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements
}
uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) {
int local_min = clamp(int(z_min) - int(i) * 32, 0, 31);
int mask_width = min(int(z_max) - int(z_min), 32 - local_min);
return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width);
}
void main() {
vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size);
@ -193,6 +228,12 @@ void main() {
//posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0;
vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels
uvec2 screen_pos = uvec2(fog_unit_pos.xy * params.screen_size);
uvec2 cluster_pos = screen_pos >> params.cluster_shift;
uint cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32);
//positions in screen are too spread apart, no hopes for optimizing with subgroups
fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread);
vec3 view_pos;
@ -200,6 +241,8 @@ void main() {
view_pos.z = -params.fog_frustum_end * fog_unit_pos.z;
view_pos.y = -view_pos.y;
uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0));
vec3 total_light = params.light_color;
float total_density = params.base_density;
@ -266,95 +309,160 @@ void main() {
//compute lights from cluster
vec3 cluster_pos;
cluster_pos.xy = fog_unit_pos.xy;
cluster_pos.z = clamp((abs(view_pos.z) - params.z_near) / (params.z_far - params.z_near), 0.0, 1.0);
{ //omni lights
uvec4 cluster_cell = texture(usampler3D(cluster_texture, linear_sampler), cluster_pos);
uint cluster_omni_offset = cluster_offset;
uint omni_light_count = cluster_cell.x >> CLUSTER_COUNTER_SHIFT;
uint omni_light_pointer = cluster_cell.x & CLUSTER_POINTER_MASK;
uint item_min;
uint item_max;
uint item_from;
uint item_to;
for (uint i = 0; i < omni_light_count; i++) {
uint light_index = cluster_data.indices[omni_light_pointer + i];
cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
vec3 light_pos = lights.data[i].position;
float d = distance(lights.data[i].position, view_pos);
float shadow_attenuation = 1.0;
#ifdef USE_SUBGROUPS
item_from = subgroupBroadcastFirst(subgroupMin(item_from));
item_to = subgroupBroadcastFirst(subgroupMax(item_to));
#endif
if (d * lights.data[i].inv_radius < 1.0) {
float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, lights.data[i].attenuation);
for (uint i = item_from; i < item_to; i++) {
uint mask = cluster_buffer.data[cluster_omni_offset + i];
mask &= cluster_get_range_clip_mask(i, item_min, item_max);
#ifdef USE_SUBGROUPS
uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
#else
uint merged_mask = mask;
#endif
vec3 light = lights.data[i].color / M_PI;
if (lights.data[i].shadow_enabled) {
//has shadow
vec4 v = vec4(view_pos, 1.0);
vec4 splane = (lights.data[i].shadow_matrix * v);
float shadow_len = length(splane.xyz); //need to remember shadow len from here
splane.xyz = normalize(splane.xyz);
vec4 clamp_rect = lights.data[i].atlas_rect;
if (splane.z >= 0.0) {
splane.z += 1.0;
clamp_rect.y += clamp_rect.w;
} else {
splane.z = 1.0 - splane.z;
while (merged_mask != 0) {
uint bit = findMSB(merged_mask);
merged_mask &= ~(1 << bit);
#ifdef USE_SUBGROUPS
if (((1 << bit) & mask) == 0) { //do not process if not originally here
continue;
}
#endif
uint light_index = 32 * i + bit;
splane.xy /= splane.z;
//if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) {
// continue; //not masked
//}
splane.xy = splane.xy * 0.5 + 0.5;
splane.z = shadow_len * lights.data[i].inv_radius;
splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
splane.w = 1.0; //needed? i think it should be 1 already
vec3 light_pos = omni_lights.data[light_index].position;
float d = distance(omni_lights.data[light_index].position, view_pos);
float shadow_attenuation = 1.0;
float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
if (d * omni_lights.data[light_index].inv_radius < 1.0) {
float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation);
shadow_attenuation = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
vec3 light = omni_lights.data[light_index].color / M_PI;
if (omni_lights.data[light_index].shadow_enabled) {
//has shadow
vec4 v = vec4(view_pos, 1.0);
vec4 splane = (omni_lights.data[light_index].shadow_matrix * v);
float shadow_len = length(splane.xyz); //need to remember shadow len from here
splane.xyz = normalize(splane.xyz);
vec4 clamp_rect = omni_lights.data[light_index].atlas_rect;
if (splane.z >= 0.0) {
splane.z += 1.0;
clamp_rect.y += clamp_rect.w;
} else {
splane.z = 1.0 - splane.z;
}
splane.xy /= splane.z;
splane.xy = splane.xy * 0.5 + 0.5;
splane.z = shadow_len * omni_lights.data[light_index].inv_radius;
splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw;
splane.w = 1.0; //needed? i think it should be 1 already
float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade);
}
total_light += light * attenuation * shadow_attenuation;
}
}
total_light += light * attenuation * shadow_attenuation;
}
}
uint spot_light_count = cluster_cell.y >> CLUSTER_COUNTER_SHIFT;
uint spot_light_pointer = cluster_cell.y & CLUSTER_POINTER_MASK;
{ //spot lights
for (uint i = 0; i < spot_light_count; i++) {
uint light_index = cluster_data.indices[spot_light_pointer + i];
uint cluster_spot_offset = cluster_offset + params.cluster_type_size;
vec3 light_pos = lights.data[i].position;
vec3 light_rel_vec = lights.data[i].position - view_pos;
float d = length(light_rel_vec);
float shadow_attenuation = 1.0;
uint item_min;
uint item_max;
uint item_from;
uint item_to;
if (d * lights.data[i].inv_radius < 1.0) {
float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, lights.data[i].attenuation);
cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to);
vec3 spot_dir = lights.data[i].direction;
float scos = max(dot(-normalize(light_rel_vec), spot_dir), lights.data[i].cone_angle);
float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - lights.data[i].cone_angle));
attenuation *= 1.0 - pow(spot_rim, lights.data[i].cone_attenuation);
#ifdef USE_SUBGROUPS
item_from = subgroupBroadcastFirst(subgroupMin(item_from));
item_to = subgroupBroadcastFirst(subgroupMax(item_to));
#endif
vec3 light = lights.data[i].color / M_PI;
for (uint i = item_from; i < item_to; i++) {
uint mask = cluster_buffer.data[cluster_spot_offset + i];
mask &= cluster_get_range_clip_mask(i, item_min, item_max);
#ifdef USE_SUBGROUPS
uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask));
#else
uint merged_mask = mask;
#endif
if (lights.data[i].shadow_enabled) {
//has shadow
vec4 v = vec4(view_pos, 1.0);
while (merged_mask != 0) {
uint bit = findMSB(merged_mask);
merged_mask &= ~(1 << bit);
#ifdef USE_SUBGROUPS
if (((1 << bit) & mask) == 0) { //do not process if not originally here
continue;
}
#endif
vec4 splane = (lights.data[i].shadow_matrix * v);
splane /= splane.w;
//if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) {
// continue; //not masked
//}
float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
uint light_index = 32 * i + bit;
shadow_attenuation = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
vec3 light_pos = omni_lights.data[light_index].position;
vec3 light_rel_vec = omni_lights.data[light_index].position - view_pos;
float d = length(light_rel_vec);
float shadow_attenuation = 1.0;
if (d * omni_lights.data[light_index].inv_radius < 1.0) {
float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation);
vec3 spot_dir = omni_lights.data[light_index].direction;
float scos = max(dot(-normalize(light_rel_vec), spot_dir), omni_lights.data[light_index].cone_angle);
float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - omni_lights.data[light_index].cone_angle));
attenuation *= 1.0 - pow(spot_rim, omni_lights.data[light_index].cone_attenuation);
vec3 light = omni_lights.data[light_index].color / M_PI;
if (omni_lights.data[light_index].shadow_enabled) {
//has shadow
vec4 v = vec4(view_pos, 1.0);
vec4 splane = (omni_lights.data[light_index].shadow_matrix * v);
splane /= splane.w;
float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade);
}
total_light += light * attenuation * shadow_attenuation;
}
}
total_light += light * attenuation * shadow_attenuation;
}
}

View file

@ -436,7 +436,7 @@ void RendererSceneCull::instance_set_base(RID p_instance, RID p_base) {
case RS::INSTANCE_LIGHT: {
InstanceLightData *light = static_cast<InstanceLightData *>(instance->base_data);
if (scenario && RSG::storage->light_get_type(instance->base) != RS::LIGHT_DIRECTIONAL && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
if (scenario && instance->visible && RSG::storage->light_get_type(instance->base) != RS::LIGHT_DIRECTIONAL && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
scenario->dynamic_lights.erase(light->instance);
}
@ -783,6 +783,17 @@ void RendererSceneCull::instance_set_visible(RID p_instance, bool p_visible) {
_unpair_instance(instance);
}
if (instance->base_type == RS::INSTANCE_LIGHT) {
InstanceLightData *light = static_cast<InstanceLightData *>(instance->base_data);
if (instance->scenario && RSG::storage->light_get_type(instance->base) != RS::LIGHT_DIRECTIONAL && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
if (p_visible) {
instance->scenario->dynamic_lights.push_back(light->instance);
} else {
instance->scenario->dynamic_lights.erase(light->instance);
}
}
}
if (instance->base_type == RS::INSTANCE_PARTICLES_COLLISION) {
InstanceParticlesCollisionData *collision = static_cast<InstanceParticlesCollisionData *>(instance->base_data);
RSG::storage->particles_collision_instance_set_active(collision->instance, p_visible);
@ -1150,13 +1161,13 @@ void RendererSceneCull::_update_instance(Instance *p_instance) {
RS::LightBakeMode bake_mode = RSG::storage->light_get_bake_mode(p_instance->base);
if (RSG::storage->light_get_type(p_instance->base) != RS::LIGHT_DIRECTIONAL && bake_mode != light->bake_mode) {
if (p_instance->scenario && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
if (p_instance->visible && p_instance->scenario && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
p_instance->scenario->dynamic_lights.erase(light->instance);
}
light->bake_mode = bake_mode;
if (p_instance->scenario && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
if (p_instance->visible && p_instance->scenario && light->bake_mode == RS::LIGHT_BAKE_DYNAMIC) {
p_instance->scenario->dynamic_lights.push_back(light->instance);
}
}

View file

@ -262,10 +262,10 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("texture_resolve_multisample", "from_texture", "to_texture", "sync_with_draw"), &RenderingDevice::texture_resolve_multisample, DEFVAL(false));
ClassDB::bind_method(D_METHOD("framebuffer_format_create", "attachments"), &RenderingDevice::_framebuffer_format_create);
ClassDB::bind_method(D_METHOD("framebuffer_format_create_empty", "size"), &RenderingDevice::framebuffer_format_create_empty);
ClassDB::bind_method(D_METHOD("framebuffer_format_create_empty", "samples"), &RenderingDevice::framebuffer_format_create_empty, DEFVAL(TEXTURE_SAMPLES_1));
ClassDB::bind_method(D_METHOD("framebuffer_format_get_texture_samples", "format"), &RenderingDevice::framebuffer_format_get_texture_samples);
ClassDB::bind_method(D_METHOD("framebuffer_create", "textures", "validate_with_format"), &RenderingDevice::_framebuffer_create, DEFVAL(INVALID_FORMAT_ID));
ClassDB::bind_method(D_METHOD("framebuffer_create_empty", "size", "validate_with_format"), &RenderingDevice::framebuffer_create_empty, DEFVAL(INVALID_FORMAT_ID));
ClassDB::bind_method(D_METHOD("framebuffer_create_empty", "size", "samples", "validate_with_format"), &RenderingDevice::framebuffer_create_empty, DEFVAL(TEXTURE_SAMPLES_1), DEFVAL(INVALID_FORMAT_ID));
ClassDB::bind_method(D_METHOD("framebuffer_get_format", "framebuffer"), &RenderingDevice::framebuffer_get_format);
ClassDB::bind_method(D_METHOD("sampler_create", "state"), &RenderingDevice::_sampler_create);
@ -288,6 +288,7 @@ void RenderingDevice::_bind_methods() {
ClassDB::bind_method(D_METHOD("uniform_set_is_valid", "uniform_set"), &RenderingDevice::uniform_set_is_valid);
ClassDB::bind_method(D_METHOD("buffer_update", "buffer", "offset", "size_bytes", "data", "sync_with_draw"), &RenderingDevice::_buffer_update, DEFVAL(true));
ClassDB::bind_method(D_METHOD("buffer_clear", "buffer", "offset", "size_bytes", "sync_with_draw"), &RenderingDevice::_buffer_update, DEFVAL(true));
ClassDB::bind_method(D_METHOD("buffer_get_data", "buffer"), &RenderingDevice::buffer_get_data);
ClassDB::bind_method(D_METHOD("render_pipeline_create", "shader", "framebuffer_format", "vertex_format", "primitive", "rasterization_state", "multisample_state", "stencil_state", "color_blend_state", "dynamic_state_flags"), &RenderingDevice::_render_pipeline_create, DEFVAL(0));

View file

@ -468,11 +468,11 @@ public:
// This ID is warranted to be unique for the same formats, does not need to be freed
virtual FramebufferFormatID framebuffer_format_create(const Vector<AttachmentFormat> &p_format) = 0;
virtual FramebufferFormatID framebuffer_format_create_empty(const Size2i &p_size) = 0;
virtual FramebufferFormatID framebuffer_format_create_empty(TextureSamples p_samples = TEXTURE_SAMPLES_1) = 0;
virtual TextureSamples framebuffer_format_get_texture_samples(FramebufferFormatID p_format) = 0;
virtual RID framebuffer_create(const Vector<RID> &p_texture_attachments, FramebufferFormatID p_format_check = INVALID_ID) = 0;
virtual RID framebuffer_create_empty(const Size2i &p_size, FramebufferFormatID p_format_check = INVALID_ID) = 0;
virtual RID framebuffer_create_empty(const Size2i &p_size, TextureSamples p_samples = TEXTURE_SAMPLES_1, FramebufferFormatID p_format_check = INVALID_ID) = 0;
virtual FramebufferFormatID framebuffer_get_format(RID p_framebuffer) = 0;
@ -650,6 +650,7 @@ public:
virtual bool uniform_set_is_valid(RID p_uniform_set) = 0;
virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_sync_with_draw = false) = 0; //this function can be used from any thread and it takes effect at the beginning of the frame, unless sync with draw is used, which is used to mix updates with draw calls
virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, bool p_sync_with_draw = false) = 0;
virtual Vector<uint8_t> buffer_get_data(RID p_buffer) = 0; //this causes stall, only use to retrieve large buffers for saving
/*************************/

View file

@ -2389,6 +2389,9 @@ RenderingServer::RenderingServer() {
ProjectSettings::get_singleton()->set_custom_property_info("rendering/spatial_indexer/threaded_cull_minimum_instances", PropertyInfo(Variant::INT, "rendering/spatial_indexer/threaded_cull_minimum_instances", PROPERTY_HINT_RANGE, "32,65536,1"));
GLOBAL_DEF("rendering/forward_renderer/threaded_render_minimum_instances", 500);
ProjectSettings::get_singleton()->set_custom_property_info("rendering/forward_renderer/threaded_render_minimum_instances", PropertyInfo(Variant::INT, "rendering/forward_renderer/threaded_render_minimum_instances", PROPERTY_HINT_RANGE, "32,65536,1"));
GLOBAL_DEF("rendering/cluster_builder/max_clustered_elements", 512);
ProjectSettings::get_singleton()->set_custom_property_info("rendering/cluster_builder/max_clustered_elements", PropertyInfo(Variant::FLOAT, "rendering/cluster_builder/max_clustered_elements", PROPERTY_HINT_RANGE, "32,8192,1"));
}
RenderingServer::~RenderingServer() {

View file

@ -856,7 +856,10 @@ public:
VIEWPORT_DEBUG_DRAW_SDFGI_PROBES,
VIEWPORT_DEBUG_DRAW_GI_BUFFER,
VIEWPORT_DEBUG_DRAW_DISABLE_LOD,
VIEWPORT_DEBUG_DRAW_CLUSTER_OMNI_LIGHTS,
VIEWPORT_DEBUG_DRAW_CLUSTER_SPOT_LIGHTS,
VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS,
VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES,
};
virtual void viewport_set_debug_draw(RID p_viewport, ViewportDebugDraw p_draw) = 0;