From ecd39094eda8fd6bfba79659e83dee296fa8e931 Mon Sep 17 00:00:00 2001 From: lawnjelly Date: Sat, 15 Aug 2020 14:31:16 +0100 Subject: [PATCH] GLES2 2D fix normal mapping - batching and nvidia workaround Normal mapping previously took no account of rotation or flips in any path except the TEXTURE_RECT (uniform draw) method. This passed flips to the shader in uniforms. In order to pass flips and rotations to the shader in batching and nvidia workaround, a per vertex attribute is required rather than a uniform. This introduces LIGHT_ANGLE which encodes both the rotation of a quad (vertex) and the horizontal and vertical flip. In order to optionally store light angles in batching, we switch to using a 'unit' sized array which can be reused for different FVF types, as there is no need for a separate array for each FVF, as it is a waste of memory. --- drivers/gles2/rasterizer_array_gles2.h | 69 +++ .../gles2/rasterizer_canvas_base_gles2.cpp | 45 +- drivers/gles2/rasterizer_canvas_base_gles2.h | 4 +- drivers/gles2/rasterizer_canvas_gles2.cpp | 432 +++++------------ drivers/gles2/rasterizer_canvas_gles2.h | 438 +++++++++++++++++- drivers/gles2/rasterizer_gles2.cpp | 2 +- drivers/gles2/shaders/canvas.glsl | 28 ++ 7 files changed, 691 insertions(+), 327 deletions(-) diff --git a/drivers/gles2/rasterizer_array_gles2.h b/drivers/gles2/rasterizer_array_gles2.h index fd821a41c9..64588934d8 100644 --- a/drivers/gles2/rasterizer_array_gles2.h +++ b/drivers/gles2/rasterizer_array_gles2.h @@ -71,6 +71,75 @@ #include +// very simple non-growable array, that keeps track of the size of a 'unit' +// which can be cast to whatever vertex format FVF required, and is initially +// created with enough memory to hold the biggest FVF. +// This allows multiple FVFs to use the same array. +class RasterizerUnitArrayGLES2 { +public: + RasterizerUnitArrayGLES2() { + _list = nullptr; + free(); + } + ~RasterizerUnitArrayGLES2() { free(); } + + uint8_t *get_unit(unsigned int ui) { return &_list[ui * _unit_size_bytes]; } + const uint8_t *get_unit(unsigned int ui) const { return &_list[ui * _unit_size_bytes]; } + + int size() const { return _size; } + int max_size() const { return _max_size; } + + void free() { + if (_list) { + memdelete_arr(_list); + _list = 0; + } + _size = 0; + _max_size = 0; + _max_size_bytes = 0; + _unit_size_bytes = 0; + } + + void create(int p_max_size_units, int p_max_unit_size_bytes) { + free(); + + _max_unit_size_bytes = p_max_unit_size_bytes; + _max_size = p_max_size_units; + _max_size_bytes = p_max_size_units * p_max_unit_size_bytes; + + if (_max_size_bytes) { + _list = memnew_arr(uint8_t, _max_size_bytes); + } + } + + void prepare(int p_unit_size_bytes) { + _unit_size_bytes = p_unit_size_bytes; + _size = 0; + } + + // several items at a time + uint8_t *request(int p_num_items = 1) { + int old_size = _size; + _size += p_num_items; + + if (_size <= _max_size) { + return get_unit(old_size); + } + + // revert + _size = old_size; + return nullptr; + } + +private: + uint8_t *_list; + int _size; // in units + int _max_size; // in units + int _max_size_bytes; + int _unit_size_bytes; + int _max_unit_size_bytes; +}; + template class RasterizerArrayGLES2 { public: diff --git a/drivers/gles2/rasterizer_canvas_base_gles2.cpp b/drivers/gles2/rasterizer_canvas_base_gles2.cpp index 37963eaf28..8b5046d527 100644 --- a/drivers/gles2/rasterizer_canvas_base_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_base_gles2.cpp @@ -52,8 +52,13 @@ void RasterizerCanvasBaseGLES2::light_internal_free(RID p_rid) { void RasterizerCanvasBaseGLES2::canvas_begin() { - state.canvas_shader.bind(); state.using_transparent_rt = false; + + // always start with light_angle unset + state.using_light_angle = false; + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHT_ANGLE, false); + state.canvas_shader.bind(); + int viewport_x, viewport_y, viewport_width, viewport_height; if (storage->frame.current_rt) { @@ -155,6 +160,16 @@ void RasterizerCanvasBaseGLES2::draw_generic_textured_rect(const Rect2 &p_rect, glDrawArrays(GL_TRIANGLE_FAN, 0, 4); } +void RasterizerCanvasBaseGLES2::_set_texture_rect_mode(bool p_texture_rect, bool p_light_angle) { + // always set this directly (this could be state checked) + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, p_texture_rect); + + if (state.using_light_angle != p_light_angle) { + state.using_light_angle = p_light_angle; + state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_LIGHT_ANGLE, p_light_angle); + } +} + RasterizerStorageGLES2::Texture *RasterizerCanvasBaseGLES2::_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map) { RasterizerStorageGLES2::Texture *tex_return = NULL; @@ -595,12 +610,13 @@ void RasterizerCanvasBaseGLES2::_draw_generic_indices(GLuint p_primitive, const glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } -void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs) { +void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs, const float *p_light_angles) { static const GLenum prim[5] = { GL_POINTS, GL_POINTS, GL_LINES, GL_TRIANGLES, GL_TRIANGLE_FAN }; int color_offset = 0; int uv_offset = 0; + int light_angle_offset = 0; int stride = 2; if (p_colors) { @@ -613,7 +629,12 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 stride += 2; } - float buffer_data[(2 + 2 + 4) * 4]; + if (p_light_angles) { //light_angles + light_angle_offset = stride; + stride += 1; + } + + float buffer_data[(2 + 2 + 4 + 1) * 4]; for (int i = 0; i < p_points; i++) { buffer_data[stride * i + 0] = p_vertices[i].x; @@ -636,6 +657,12 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 } } + if (p_light_angles) { + for (int i = 0; i < p_points; i++) { + buffer_data[stride * i + light_angle_offset + 0] = p_light_angles[i]; + } + } + glBindBuffer(GL_ARRAY_BUFFER, data.polygon_buffer); #ifndef GLES_OVER_GL // Orphan the buffer to avoid CPU/GPU sync points caused by glBufferSubData @@ -655,9 +682,19 @@ void RasterizerCanvasBaseGLES2::_draw_gui_primitive(int p_points, const Vector2 glEnableVertexAttribArray(VS::ARRAY_TEX_UV); } + if (p_light_angles) { + glVertexAttribPointer(VS::ARRAY_TANGENT, 1, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(light_angle_offset * sizeof(float))); + glEnableVertexAttribArray(VS::ARRAY_TANGENT); + } + glDrawArrays(prim[p_points], 0, p_points); storage->info.render._2d_draw_call_count++; + if (p_light_angles) { + // may not be needed + glDisableVertexAttribArray(VS::ARRAY_TANGENT); + } + glBindBuffer(GL_ARRAY_BUFFER, 0); } @@ -993,7 +1030,7 @@ void RasterizerCanvasBaseGLES2::initialize() { state.canvas_shader.init(); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); + _set_texture_rect_mode(true); state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_RGBA_SHADOWS, storage->config.use_rgba_2d_shadows); state.canvas_shader.bind(); diff --git a/drivers/gles2/rasterizer_canvas_base_gles2.h b/drivers/gles2/rasterizer_canvas_base_gles2.h index 32d55bc3ee..abe0dbc65c 100644 --- a/drivers/gles2/rasterizer_canvas_base_gles2.h +++ b/drivers/gles2/rasterizer_canvas_base_gles2.h @@ -77,6 +77,7 @@ public: LensDistortedShaderGLES2 lens_shader; bool using_texture_rect; + bool using_light_angle; bool using_ninepatch; bool using_skeleton; @@ -112,7 +113,7 @@ public: virtual void canvas_begin(); virtual void canvas_end(); - void _draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs); + void _draw_gui_primitive(int p_points, const Vector2 *p_vertices, const Color *p_colors, const Vector2 *p_uvs, const float *p_light_angles = nullptr); void _draw_polygon(const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor, const float *p_weights = NULL, const int *p_bones = NULL); void _draw_generic(GLuint p_primitive, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor); void _draw_generic_indices(GLuint p_primitive, const int *p_indices, int p_index_count, int p_vertex_count, const Vector2 *p_vertices, const Vector2 *p_uvs, const Color *p_colors, bool p_singlecolor); @@ -130,6 +131,7 @@ public: virtual void canvas_debug_viewport_shadows(Light *p_lights_with_shadow); RasterizerStorageGLES2::Texture *_bind_canvas_texture(const RID &p_texture, const RID &p_normal_map); + void _set_texture_rect_mode(bool p_texture_rect, bool p_light_angle = false); void initialize(); void finalize(); diff --git a/drivers/gles2/rasterizer_canvas_gles2.cpp b/drivers/gles2/rasterizer_canvas_gles2.cpp index bbac9e3ed5..711648bd37 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.cpp +++ b/drivers/gles2/rasterizer_canvas_gles2.cpp @@ -55,6 +55,7 @@ RasterizerCanvasGLES2::BatchData::BatchData() { index_buffer_size_units = 0; index_buffer_size_bytes = 0; use_colored_vertices = false; + use_light_angles = false; settings_use_batching = false; settings_max_join_item_commands = 0; settings_colored_vertex_format_threshold = 0.0f; @@ -212,10 +213,14 @@ void RasterizerCanvasGLES2::_batch_upload_buffers() { // orphan the old (for now) glBufferData(GL_ARRAY_BUFFER, 0, 0, GL_DYNAMIC_DRAW); - if (!bdata.use_colored_vertices) { - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW); + if (!bdata.use_light_angles) { + if (!bdata.use_colored_vertices) { + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertex) * bdata.vertices.size(), bdata.vertices.get_data(), GL_DYNAMIC_DRAW); + } else { + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); + } } else { - glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexColored) * bdata.vertices_colored.size(), bdata.vertices_colored.get_data(), GL_DYNAMIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(BatchVertexLightAngled) * bdata.unit_vertices.size(), bdata.unit_vertices.get_unit(0), GL_DYNAMIC_DRAW); } // might not be necessary @@ -251,10 +256,6 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ int command_count = p_item->commands.size(); Item::Command *const *commands = p_item->commands.ptr(); - // locals, might be more efficient in a register (check) - Vector2 texpixel_size = r_fill_state.texpixel_size; - const float uv_epsilon = bdata.settings_uv_contract_amount; - // checking the color for not being white makes it 92/90 times faster in the case where it is white bool multiply_final_modulate = false; if (!r_fill_state.use_hardware_transform && (r_fill_state.final_modulate != Color(1, 1, 1, 1))) { @@ -316,196 +317,21 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ Item::CommandRect *rect = static_cast(command); - bool change_batch = false; + // unoptimized - could this be done once per batch / batch texture? + bool send_light_angles = rect->normal_map != RID(); - // conditions for creating a new batch - if (r_fill_state.curr_batch->type != Batch::BT_RECT) { - change_batch = true; + bool buffer_full = false; - // check for special case if there is only a single or small number of rects, - // in which case we will use the legacy default rect renderer - // because it is faster for single rects - - // we only want to do this if not a joined item with more than 1 item, - // because joined items with more than 1, the command * will be incorrect - // NOTE - this is assuming that use_hardware_transform means that it is a non-joined item!! - // If that assumption is incorrect this will go horribly wrong. - if (bdata.settings_use_single_rect_fallback && r_fill_state.use_hardware_transform) { - bool is_single_rect = false; - int command_num_next = command_num + 1; - if (command_num_next < command_count) { - Item::Command *command_next = commands[command_num_next]; - if ((command_next->type != Item::Command::TYPE_RECT) && (command_next->type != Item::Command::TYPE_TRANSFORM)) { - is_single_rect = true; - } - } else { - is_single_rect = true; - } - // if it is a rect on its own, do exactly the same as the default routine - if (is_single_rect) { - _prefill_default_batch(r_fill_state, command_num, *p_item); - break; - } - } // if use hardware transform + // the template params must be explicit for compilation, + // this forces building the multiple versions of the function. + if (send_light_angles) { + buffer_full = prefill_rect(rect, r_fill_state, r_command_start, command_num, command_count, commands, p_item, multiply_final_modulate); + } else { + buffer_full = prefill_rect(rect, r_fill_state, r_command_start, command_num, command_count, commands, p_item, multiply_final_modulate); } - Color col = rect->modulate; - if (multiply_final_modulate) { - col *= r_fill_state.final_modulate; - } - - // instead of doing all the texture preparation for EVERY rect, - // we build a list of texture combinations and do this once off. - // This means we have a potentially rather slow step to identify which texture combo - // using the RIDs. - int old_batch_tex_id = r_fill_state.batch_tex_id; - r_fill_state.batch_tex_id = _batch_find_or_create_tex(rect->texture, rect->normal_map, rect->flags & CANVAS_RECT_TILE, old_batch_tex_id); - - // try to create vertices BEFORE creating a batch, - // because if the vertex buffer is full, we need to finish this - // function, draw what we have so far, and then start a new set of batches - - // request FOUR vertices at a time, this is more efficient - BatchVertex *bvs = bdata.vertices.request(4); - if (!bvs) { - // run out of space in the vertex buffer .. finish this function and draw what we have so far - // return where we got to - r_command_start = command_num; + if (buffer_full) return true; - } - - // conditions for creating a new batch - if (old_batch_tex_id != r_fill_state.batch_tex_id) { - change_batch = true; - } - - // we need to treat color change separately because we need to count these - // to decide whether to switch on the fly to colored vertices. - if (!r_fill_state.curr_batch->color.equals(col)) { - change_batch = true; - bdata.total_color_changes++; - } - - if (change_batch) { - // put the tex pixel size in a local (less verbose and can be a register) - const BatchTex &batchtex = bdata.batch_textures[r_fill_state.batch_tex_id]; - batchtex.tex_pixel_size.to(texpixel_size); - - if (bdata.settings_uv_contract) { - r_fill_state.contract_uvs = (batchtex.flags & VS::TEXTURE_FLAG_FILTER) == 0; - } - - // need to preserve texpixel_size between items - r_fill_state.texpixel_size = texpixel_size; - - // open new batch (this should never fail, it dynamically grows) - r_fill_state.curr_batch = _batch_request_new(false); - - r_fill_state.curr_batch->type = Batch::BT_RECT; - r_fill_state.curr_batch->color.set(col); - r_fill_state.curr_batch->batch_texture_id = r_fill_state.batch_tex_id; - r_fill_state.curr_batch->first_command = command_num; - r_fill_state.curr_batch->num_commands = 1; - r_fill_state.curr_batch->first_quad = bdata.total_quads; - } else { - // we could alternatively do the count when closing a batch .. perhaps more efficient - r_fill_state.curr_batch->num_commands++; - } - - // fill the quad geometry - Vector2 mins = rect->rect.position; - - if (r_fill_state.transform_mode == TM_TRANSLATE) { - _software_transform_vertex(mins, r_fill_state.transform_combined); - } - - Vector2 maxs = mins + rect->rect.size; - - // just aliases - BatchVertex *bA = &bvs[0]; - BatchVertex *bB = &bvs[1]; - BatchVertex *bC = &bvs[2]; - BatchVertex *bD = &bvs[3]; - - bA->pos.x = mins.x; - bA->pos.y = mins.y; - - bB->pos.x = maxs.x; - bB->pos.y = mins.y; - - bC->pos.x = maxs.x; - bC->pos.y = maxs.y; - - bD->pos.x = mins.x; - bD->pos.y = maxs.y; - - if (rect->rect.size.x < 0) { - SWAP(bA->pos, bB->pos); - SWAP(bC->pos, bD->pos); - } - if (rect->rect.size.y < 0) { - SWAP(bA->pos, bD->pos); - SWAP(bB->pos, bC->pos); - } - - if (r_fill_state.transform_mode == TM_ALL) { - _software_transform_vertex(bA->pos, r_fill_state.transform_combined); - _software_transform_vertex(bB->pos, r_fill_state.transform_combined); - _software_transform_vertex(bC->pos, r_fill_state.transform_combined); - _software_transform_vertex(bD->pos, r_fill_state.transform_combined); - } - - // uvs - Vector2 src_min; - Vector2 src_max; - if (rect->flags & CANVAS_RECT_REGION) { - src_min = rect->source.position; - src_max = src_min + rect->source.size; - - src_min *= texpixel_size; - src_max *= texpixel_size; - - // nudge offset for the maximum to prevent precision error on GPU reading into line outside the source rect - // this is very difficult to get right. - if (r_fill_state.contract_uvs) { - src_min.x += uv_epsilon; - src_min.y += uv_epsilon; - src_max.x -= uv_epsilon; - src_max.y -= uv_epsilon; - } - } else { - src_min = Vector2(0, 0); - src_max = Vector2(1, 1); - } - - // 10% faster calculating the max first - Vector2 uvs[4] = { - src_min, - Vector2(src_max.x, src_min.y), - src_max, - Vector2(src_min.x, src_max.y), - }; - - if (rect->flags & CANVAS_RECT_TRANSPOSE) { - SWAP(uvs[1], uvs[3]); - } - - if (rect->flags & CANVAS_RECT_FLIP_H) { - SWAP(uvs[0], uvs[1]); - SWAP(uvs[2], uvs[3]); - } - if (rect->flags & CANVAS_RECT_FLIP_V) { - SWAP(uvs[0], uvs[3]); - SWAP(uvs[1], uvs[2]); - } - - bA->uv.set(uvs[0]); - bB->uv.set(uvs[1]); - bC->uv.set(uvs[2]); - bD->uv.set(uvs[3]); - - // increment quad count - bdata.total_quads++; } break; } @@ -519,119 +345,29 @@ bool RasterizerCanvasGLES2::prefill_joined_item(FillState &r_fill_state, int &r_ return false; } -// convert the stupidly high amount of batches (each with its own color) -// to larger batches where the color is stored in the verts instead... -// There is a trade off. Non colored verts are smaller so work faster, but -// there comes a point where it is better to just use colored verts to avoid lots of -// batches. -void RasterizerCanvasGLES2::_batch_translate_to_colored() { - bdata.vertices_colored.reset(); - bdata.batches_temp.reset(); - - // As the vertices_colored and batches_temp are 'mirrors' of the non-colored version, - // the sizes should be equal, and allocations should never fail. Hence the use of debug - // asserts to check program flow, these should not occur at runtime unless the allocation - // code has been altered. -#ifdef DEBUG_ENABLED - CRASH_COND(bdata.vertices_colored.max_size() != bdata.vertices.max_size()); - CRASH_COND(bdata.batches_temp.max_size() != bdata.batches.max_size()); -#endif - - Color curr_col(-1.0, -1.0, -1.0, -1.0); - - Batch *dest_batch = 0; - - // translate the batches into vertex colored batches - for (int n = 0; n < bdata.batches.size(); n++) { - const Batch &source_batch = bdata.batches[n]; - - bool needs_new_batch = true; - - if (dest_batch) { - if (dest_batch->type == source_batch.type) { - if (source_batch.type == Batch::BT_RECT) { - if (dest_batch->batch_texture_id == source_batch.batch_texture_id) { - // add to previous batch - dest_batch->num_commands += source_batch.num_commands; - needs_new_batch = false; - - // create the colored verts (only if not default) - int first_vert = source_batch.first_quad * 4; - int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands); - - for (int v = first_vert; v < end_vert; v++) { - const BatchVertex &bv = bdata.vertices[v]; - BatchVertexColored *cv = bdata.vertices_colored.request(); -#ifdef DEBUG_ENABLED - CRASH_COND(!cv); -#endif - cv->pos = bv.pos; - cv->uv = bv.uv; - cv->col = source_batch.color; - } - } // textures match - } else { - // default - // we can still join, but only under special circumstances - // does this ever happen? not sure at this stage, but left for future expansion - uint32_t source_last_command = source_batch.first_command + source_batch.num_commands; - if (source_last_command == dest_batch->first_command) { - dest_batch->num_commands += source_batch.num_commands; - needs_new_batch = false; - } // if the commands line up exactly - } - } // if both batches are the same type - - } // if dest batch is valid - - if (needs_new_batch) { - dest_batch = bdata.batches_temp.request(); -#ifdef DEBUG_ENABLED - CRASH_COND(!dest_batch); -#endif - - *dest_batch = source_batch; - - // create the colored verts (only if not default) - if (source_batch.type != Batch::BT_DEFAULT) { - int first_vert = source_batch.first_quad * 4; - int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands); - - for (int v = first_vert; v < end_vert; v++) { - const BatchVertex &bv = bdata.vertices[v]; - BatchVertexColored *cv = bdata.vertices_colored.request(); -#ifdef DEBUG_ENABLED - CRASH_COND(!cv); -#endif - cv->pos = bv.pos; - cv->uv = bv.uv; - cv->col = source_batch.color; - } - } - } - } - - // copy the temporary batches to the master batch list (this could be avoided but it makes the code cleaner) - bdata.batches.copy_from(bdata.batches_temp); -} - void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, RasterizerStorageGLES2::Material *p_material) { ERR_FAIL_COND(p_batch.num_commands <= 0); const bool &colored_verts = bdata.use_colored_vertices; + const bool &use_light_angles = bdata.use_light_angles; + int sizeof_vert; - if (!colored_verts) { - sizeof_vert = sizeof(BatchVertex); + if (!use_light_angles) { + if (!colored_verts) { + sizeof_vert = sizeof(BatchVertex); + } else { + sizeof_vert = sizeof(BatchVertexColored); + } } else { - sizeof_vert = sizeof(BatchVertexColored); + sizeof_vert = sizeof(BatchVertexLightAngled); } // batch tex const BatchTex &tex = bdata.batch_textures[p_batch.batch_texture_id]; // make sure to set all conditionals BEFORE binding the shader - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + _set_texture_rect_mode(false, use_light_angles); // force repeat is set if non power of 2 texture, and repeat is needed if hardware doesn't support npot if (tex.tile_mode == BatchTex::TILE_FORCE_REPEAT) { @@ -665,6 +401,11 @@ void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, Rasterizer glEnableVertexAttribArray(VS::ARRAY_COLOR); } + if (use_light_angles) { + glVertexAttribPointer(VS::ARRAY_TANGENT, 1, GL_FLOAT, GL_FALSE, sizeof_vert, CAST_INT_TO_UCHAR_PTR(pointer + (8 * 4))); + glEnableVertexAttribArray(VS::ARRAY_TANGENT); + } + // We only want to set the GL wrapping mode if the texture is not already tiled (i.e. set in Import). // This is an optimization left over from the legacy renderer. // If we DID set tiling in the API, and reverted to clamped, then the next draw using this texture @@ -707,8 +448,10 @@ void RasterizerCanvasGLES2::_batch_render_rects(const Batch &p_batch, Rasterizer } break; } + // could these have ifs? glDisableVertexAttribArray(VS::ARRAY_TEX_UV); glDisableVertexAttribArray(VS::ARRAY_COLOR); + glDisableVertexAttribArray(VS::ARRAY_TANGENT); // may not be necessary .. state change optimization still TODO glBindBuffer(GL_ARRAY_BUFFER, 0); @@ -848,7 +591,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite Item::CommandLine *line = static_cast(command); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + _set_texture_rect_mode(false); if (state.canvas_shader.bind()) { _set_uniforms(); state.canvas_shader.use_material((void *)p_material); @@ -934,7 +677,17 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite // To work it around, we use a simpler draw method which does not flicker, but gives // a non negligible performance hit, so it's opt-in (GH-24466). if (use_nvidia_rect_workaround) { - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + + // are we using normal maps, if so we want to use light angle + bool send_light_angles = false; + + // only need to use light angles when normal mapping + // otherwise we can use the default shader + if (state.current_normal != RID()) { + send_light_angles = true; + } + + _set_texture_rect_mode(false, send_light_angles); if (state.canvas_shader.bind()) { _set_uniforms(); @@ -971,6 +724,10 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite src_rect.position + Vector2(0.0, src_rect.size.y), }; + // for encoding in light angle + bool flip_h = false; + bool flip_v = false; + if (r->flags & CANVAS_RECT_TRANSPOSE) { SWAP(uvs[1], uvs[3]); } @@ -978,10 +735,13 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite if (r->flags & CANVAS_RECT_FLIP_H) { SWAP(uvs[0], uvs[1]); SWAP(uvs[2], uvs[3]); + flip_h = true; + flip_v = !flip_v; } if (r->flags & CANVAS_RECT_FLIP_V) { SWAP(uvs[0], uvs[3]); SWAP(uvs[1], uvs[2]); + flip_v = !flip_v; } state.canvas_shader.set_uniform(CanvasShaderGLES2::COLOR_TEXPIXEL_SIZE, texpixel_size); @@ -994,7 +754,33 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite untile = true; } - _draw_gui_primitive(4, points, NULL, uvs); + if (send_light_angles) { + // for single rects, there is no need to fully utilize the light angle, + // we only need it to encode flips (horz and vert). But the shader can be reused with + // batching in which case the angle encodes the transform as well as + // the flips. + // Note transpose is NYI. I don't think it worked either with the non-nvidia method. + + // if horizontal flip, angle is 180 + float angle = 0.0f; + if (flip_h) + angle = Math_PI; + + // add 1 (to take care of zero floating point error with sign) + angle += 1.0f; + + // flip if necessary + if (flip_v) + angle *= -1.0f; + + // light angle must be sent for each vert, instead as a single uniform in the uniform draw method + // this has the benefit of enabling batching with light angles. + float light_angles[4] = { angle, angle, angle, angle }; + + _draw_gui_primitive(4, points, NULL, uvs, light_angles); + } else { + _draw_gui_primitive(4, points, NULL, uvs); + } if (untile) { glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); @@ -1016,7 +802,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite // This branch is better for performance, but can produce flicker on Nvidia, see above comment. _bind_quad_buffer(); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); + _set_texture_rect_mode(true); if (state.canvas_shader.bind()) { _set_uniforms(); @@ -1104,7 +890,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite Item::CommandNinePatch *np = static_cast(command); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + _set_texture_rect_mode(false); if (state.canvas_shader.bind()) { _set_uniforms(); state.canvas_shader.use_material((void *)p_material); @@ -1280,7 +1066,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite Item::CommandCircle *circle = static_cast(command); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + _set_texture_rect_mode(false); if (state.canvas_shader.bind()) { _set_uniforms(); @@ -1310,7 +1096,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite Item::CommandPolygon *polygon = static_cast(command); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + _set_texture_rect_mode(false); if (state.canvas_shader.bind()) { _set_uniforms(); @@ -1340,7 +1126,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite case Item::Command::TYPE_MESH: { Item::CommandMesh *mesh = static_cast(command); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + _set_texture_rect_mode(false); if (state.canvas_shader.bind()) { _set_uniforms(); @@ -1416,7 +1202,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, multi_mesh->custom_data_format != VS::MULTIMESH_CUSTOM_DATA_NONE); state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, true); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + _set_texture_rect_mode(false); if (state.canvas_shader.bind()) { _set_uniforms(); @@ -1520,7 +1306,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite } } - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCE_CUSTOM, false); + _set_texture_rect_mode(false); state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_INSTANCING, false); storage->info.render._2d_draw_call_count++; @@ -1580,7 +1366,7 @@ void RasterizerCanvasGLES2::render_batches(Item::Command *const *p_commands, Ite case Item::Command::TYPE_PRIMITIVE: { Item::CommandPrimitive *primitive = static_cast(command); - state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, false); + _set_texture_rect_mode(false); if (state.canvas_shader.bind()) { _set_uniforms(); @@ -1732,23 +1518,28 @@ void RasterizerCanvasGLES2::flush_render_batches(Item *p_first_item, Item *p_cur // .. however probably not necessary bdata.use_colored_vertices = false; - // only check whether to convert if there are quads (prevent divide by zero) - // and we haven't decided to prevent color baking (due to e.g. MODULATE - // being used in a shader) - if (bdata.total_quads && !(bdata.joined_item_batch_flags & RasterizerStorageGLES2::Shader::CanvasItem::PREVENT_COLOR_BAKING)) { - // minus 1 to prevent single primitives (ratio 1.0) always being converted to colored.. - // in that case it is slightly cheaper to just have the color as part of the batch - float ratio = (float)(bdata.total_color_changes - 1) / (float)bdata.total_quads; + if (bdata.use_light_angles) { + _translate_batches_to_larger_FVF(); + } else { + // only check whether to convert if there are quads (prevent divide by zero) + // and we haven't decided to prevent color baking (due to e.g. MODULATE + // being used in a shader) + if (bdata.total_quads && !(bdata.joined_item_batch_flags & RasterizerStorageGLES2::Shader::CanvasItem::PREVENT_COLOR_BAKING)) { + // minus 1 to prevent single primitives (ratio 1.0) always being converted to colored.. + // in that case it is slightly cheaper to just have the color as part of the batch + float ratio = (float)(bdata.total_color_changes - 1) / (float)bdata.total_quads; - // use bigger than or equal so that 0.0 threshold can force always using colored verts - if (ratio >= bdata.settings_colored_vertex_format_threshold) { - bdata.use_colored_vertices = true; + // use bigger than or equal so that 0.0 threshold can force always using colored verts + if (ratio >= bdata.settings_colored_vertex_format_threshold) { + bdata.use_colored_vertices = true; - // small perf cost versus going straight to colored verts (maybe around 10%) - // however more straightforward - _batch_translate_to_colored(); + // small perf cost versus going straight to colored verts (maybe around 10%) + // however more straightforward + _translate_batches_to_larger_FVF(); + //_batch_translate_to_colored(); + } } - } + } // if not using light angles // send buffers to opengl _batch_upload_buffers(); @@ -3517,9 +3308,12 @@ void RasterizerCanvasGLES2::initialize() { bdata.vertex_buffer_size_bytes = bdata.vertex_buffer_size_units * sizeof_batch_vert; bdata.index_buffer_size_bytes = bdata.index_buffer_size_units * 2; // 16 bit inds - // create equal number of norma and colored verts (as the normal may need to be translated to colored) + // create equal number of normal and (max) unit sized verts (as the normal may need to be translated to a larger FVF) bdata.vertices.create(bdata.vertex_buffer_size_units); // 512k - bdata.vertices_colored.create(bdata.vertices.max_size()); // 1024k + bdata.unit_vertices.create(bdata.vertices.max_size(), sizeof(BatchVertexLightAngled)); + + // extra data per vert needed for larger FVFs + bdata.light_angles.create(bdata.vertices.max_size()); // num batches will be auto increased dynamically if required bdata.batches.create(1024); diff --git a/drivers/gles2/rasterizer_canvas_gles2.h b/drivers/gles2/rasterizer_canvas_gles2.h index b07998333e..e9a10486e9 100644 --- a/drivers/gles2/rasterizer_canvas_gles2.h +++ b/drivers/gles2/rasterizer_canvas_gles2.h @@ -89,6 +89,11 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { BatchColor col; }; + struct BatchVertexLightAngled : public BatchVertexColored { + // must be pod + float light_angle; + }; + struct Batch { enum CommandType : uint32_t { BT_DEFAULT, @@ -167,10 +172,13 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { void reset_flush() { batches.reset(); batch_textures.reset(); + vertices.reset(); + light_angles.reset(); total_quads = 0; total_color_changes = 0; + use_light_angles = false; } GLuint gl_vertex_buffer; @@ -182,13 +190,28 @@ class RasterizerCanvasGLES2 : public RasterizerCanvasBaseGLES2 { uint32_t index_buffer_size_units; uint32_t index_buffer_size_bytes; + // small vertex FVF type - pos and UV. + // This will always be written to initially, but can be translated + // to larger FVFs if necessary. RasterizerArrayGLES2 vertices; - RasterizerArrayGLES2 vertices_colored; + + // extra data which can be stored during prefilling, for later translation to larger FVFs + RasterizerArrayGLES2 light_angles; + + // instead of having a different buffer for each vertex FVF type + // we have a special array big enough for the biggest FVF + // which can have a changeable unit size, and reuse it. + RasterizerUnitArrayGLES2 unit_vertices; + RasterizerArrayGLES2 batches; RasterizerArrayGLES2 batches_temp; // used for translating to colored vertex batches RasterizerArray_non_pod_GLES2 batch_textures; // the only reason this is non-POD is because of RIDs + // flexible vertex format. + // all verts have pos and UV. + // some have color, some light angles etc. bool use_colored_vertices; + bool use_light_angles; RasterizerArrayGLES2 items_joined; RasterizerArrayGLES2 item_refs; @@ -321,11 +344,12 @@ private: bool try_join_item(Item *p_ci, RenderItemState &r_ris, bool &r_batch_break); void render_joined_item_commands(const BItemJoined &p_bij, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material, bool p_lit); void render_batches(Item::Command *const *p_commands, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); + bool prefill_joined_item(FillState &r_fill_state, int &r_command_start, Item *p_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); + void flush_render_batches(Item *p_first_item, Item *p_current_clip, bool &r_reclip, RasterizerStorageGLES2::Material *p_material); // low level batch funcs - void _batch_translate_to_colored(); int _batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match); RasterizerStorageGLES2::Texture *_get_canvas_texture(const RID &p_texture) const; void _batch_upload_buffers(); @@ -358,6 +382,13 @@ private: public: void initialize(); RasterizerCanvasGLES2(); + +private: + template + bool prefill_rect(Item::CommandRect *rect, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, Item::Command *const *commands, Item *p_item, bool multiply_final_modulate); + + template + void _translate_batches_to_larger_FVF(); }; ////////////////////////////////////////////////////////////// @@ -485,4 +516,407 @@ inline bool RasterizerCanvasGLES2::_sort_items_match(const BSortItem &p_a, const return true; } +////////////////////////////////////////////////////////////// +// TEMPLATE FUNCS + +// Translation always involved adding color to the FVF, which enables +// joining of batches that have different colors. +// There is a trade off. Non colored verts are smaller so work faster, but +// there comes a point where it is better to just use colored verts to avoid lots of +// batches. +// In addition this can optionally add light angles to the FVF, necessary for normal mapping. +template +void RasterizerCanvasGLES2::_translate_batches_to_larger_FVF() { + + // zeros the size and sets up how big each unit is + bdata.unit_vertices.prepare(sizeof(BATCH_VERTEX_TYPE)); + bdata.batches_temp.reset(); + + // As the vertices_colored and batches_temp are 'mirrors' of the non-colored version, + // the sizes should be equal, and allocations should never fail. Hence the use of debug + // asserts to check program flow, these should not occur at runtime unless the allocation + // code has been altered. +#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED) + CRASH_COND(bdata.unit_vertices.max_size() != bdata.vertices.max_size()); + CRASH_COND(bdata.batches_temp.max_size() != bdata.batches.max_size()); +#endif + + Color curr_col(-1.0, -1.0, -1.0, -1.0); + + Batch *dest_batch = 0; + + const float *source_light_angles = &bdata.light_angles[0]; + + // translate the batches into vertex colored batches + for (int n = 0; n < bdata.batches.size(); n++) { + const Batch &source_batch = bdata.batches[n]; + + // does source batch use light angles? + const BatchTex &btex = bdata.batch_textures[source_batch.batch_texture_id]; + bool source_batch_uses_light_angles = btex.RID_normal != RID(); + + bool needs_new_batch = true; + + if (dest_batch) { + if (dest_batch->type == source_batch.type) { + if (source_batch.type == Batch::BT_RECT) { + if (dest_batch->batch_texture_id == source_batch.batch_texture_id) { + // add to previous batch + dest_batch->num_commands += source_batch.num_commands; + needs_new_batch = false; + + // create the colored verts (only if not default) + int first_vert = source_batch.first_quad * 4; + int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands); + + for (int v = first_vert; v < end_vert; v++) { + const BatchVertex &bv = bdata.vertices[v]; + BATCH_VERTEX_TYPE *cv = (BatchVertexLightAngled *)bdata.unit_vertices.request(); +#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED) + CRASH_COND(!cv); +#endif + cv->pos = bv.pos; + cv->uv = bv.uv; + cv->col = source_batch.color; + + if (INCLUDE_LIGHT_ANGLES) { + // this is required to allow compilation with non light angle vertex. + // it should be compiled out. + BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv; + if (source_batch_uses_light_angles) + lv->light_angle = *source_light_angles++; + else + lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea) + } + } + } // textures match + } else { + // default + // we can still join, but only under special circumstances + // does this ever happen? not sure at this stage, but left for future expansion + uint32_t source_last_command = source_batch.first_command + source_batch.num_commands; + if (source_last_command == dest_batch->first_command) { + dest_batch->num_commands += source_batch.num_commands; + needs_new_batch = false; + } // if the commands line up exactly + } + } // if both batches are the same type + + } // if dest batch is valid + + if (needs_new_batch) { + dest_batch = bdata.batches_temp.request(); +#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED) + CRASH_COND(!dest_batch); +#endif + + *dest_batch = source_batch; + + // create the colored verts (only if not default) + if (source_batch.type != Batch::BT_DEFAULT) { + int first_vert = source_batch.first_quad * 4; + int end_vert = 4 * (source_batch.first_quad + source_batch.num_commands); + + for (int v = first_vert; v < end_vert; v++) { + const BatchVertex &bv = bdata.vertices[v]; + BATCH_VERTEX_TYPE *cv = (BatchVertexLightAngled *)bdata.unit_vertices.request(); +#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED) + CRASH_COND(!cv); +#endif + cv->pos = bv.pos; + cv->uv = bv.uv; + cv->col = source_batch.color; + + if (INCLUDE_LIGHT_ANGLES) { + // this is required to allow compilation with non light angle vertex. + // it should be compiled out. + BatchVertexLightAngled *lv = (BatchVertexLightAngled *)cv; + if (source_batch_uses_light_angles) + lv->light_angle = *source_light_angles++; + else + lv->light_angle = 0.0f; // dummy, unused in vertex shader (could possibly be left uninitialized, but probably bad idea) + } // if using light angles + } + } + } + } + + // copy the temporary batches to the master batch list (this could be avoided but it makes the code cleaner) + bdata.batches.copy_from(bdata.batches_temp); +} + +// return true if buffer full up, else return false +template +bool RasterizerCanvasGLES2::prefill_rect(Item::CommandRect *rect, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, Item::Command *const *commands, Item *p_item, bool multiply_final_modulate) { + bool change_batch = false; + + // conditions for creating a new batch + if (r_fill_state.curr_batch->type != Batch::BT_RECT) { + change_batch = true; + + // check for special case if there is only a single or small number of rects, + // in which case we will use the legacy default rect renderer + // because it is faster for single rects + + // we only want to do this if not a joined item with more than 1 item, + // because joined items with more than 1, the command * will be incorrect + // NOTE - this is assuming that use_hardware_transform means that it is a non-joined item!! + // If that assumption is incorrect this will go horribly wrong. + if (bdata.settings_use_single_rect_fallback && r_fill_state.use_hardware_transform) { + bool is_single_rect = false; + int command_num_next = command_num + 1; + if (command_num_next < command_count) { + Item::Command *command_next = commands[command_num_next]; + if ((command_next->type != Item::Command::TYPE_RECT) && (command_next->type != Item::Command::TYPE_TRANSFORM)) { + is_single_rect = true; + } + } else { + is_single_rect = true; + } + // if it is a rect on its own, do exactly the same as the default routine + if (is_single_rect) { + _prefill_default_batch(r_fill_state, command_num, *p_item); + return false; + } + } // if use hardware transform + } + + Color col = rect->modulate; + if (multiply_final_modulate) { + col *= r_fill_state.final_modulate; + } + + // instead of doing all the texture preparation for EVERY rect, + // we build a list of texture combinations and do this once off. + // This means we have a potentially rather slow step to identify which texture combo + // using the RIDs. + int old_batch_tex_id = r_fill_state.batch_tex_id; + r_fill_state.batch_tex_id = _batch_find_or_create_tex(rect->texture, rect->normal_map, rect->flags & CANVAS_RECT_TILE, old_batch_tex_id); + + //r_fill_state.use_light_angles = send_light_angles; + if (SEND_LIGHT_ANGLES) + bdata.use_light_angles = true; + + // try to create vertices BEFORE creating a batch, + // because if the vertex buffer is full, we need to finish this + // function, draw what we have so far, and then start a new set of batches + + // request FOUR vertices at a time, this is more efficient + BatchVertex *bvs = bdata.vertices.request(4); + if (!bvs) { + // run out of space in the vertex buffer .. finish this function and draw what we have so far + // return where we got to + r_command_start = command_num; + return true; + } + + // conditions for creating a new batch + if (old_batch_tex_id != r_fill_state.batch_tex_id) { + change_batch = true; + } + + // we need to treat color change separately because we need to count these + // to decide whether to switch on the fly to colored vertices. + if (!r_fill_state.curr_batch->color.equals(col)) { + change_batch = true; + bdata.total_color_changes++; + } + + if (change_batch) { + // put the tex pixel size in a local (less verbose and can be a register) + const BatchTex &batchtex = bdata.batch_textures[r_fill_state.batch_tex_id]; + batchtex.tex_pixel_size.to(r_fill_state.texpixel_size); + + if (bdata.settings_uv_contract) { + r_fill_state.contract_uvs = (batchtex.flags & VS::TEXTURE_FLAG_FILTER) == 0; + } + + // need to preserve texpixel_size between items + r_fill_state.texpixel_size = r_fill_state.texpixel_size; + + // open new batch (this should never fail, it dynamically grows) + r_fill_state.curr_batch = _batch_request_new(false); + + r_fill_state.curr_batch->type = Batch::BT_RECT; + r_fill_state.curr_batch->color.set(col); + r_fill_state.curr_batch->batch_texture_id = r_fill_state.batch_tex_id; + r_fill_state.curr_batch->first_command = command_num; + r_fill_state.curr_batch->num_commands = 1; + r_fill_state.curr_batch->first_quad = bdata.total_quads; + } else { + // we could alternatively do the count when closing a batch .. perhaps more efficient + r_fill_state.curr_batch->num_commands++; + } + + // fill the quad geometry + Vector2 mins = rect->rect.position; + + if (r_fill_state.transform_mode == TM_TRANSLATE) { + _software_transform_vertex(mins, r_fill_state.transform_combined); + } + + Vector2 maxs = mins + rect->rect.size; + + // just aliases + BatchVertex *bA = &bvs[0]; + BatchVertex *bB = &bvs[1]; + BatchVertex *bC = &bvs[2]; + BatchVertex *bD = &bvs[3]; + + bA->pos.x = mins.x; + bA->pos.y = mins.y; + + bB->pos.x = maxs.x; + bB->pos.y = mins.y; + + bC->pos.x = maxs.x; + bC->pos.y = maxs.y; + + bD->pos.x = mins.x; + bD->pos.y = maxs.y; + + // possibility of applying flips here for normal mapping .. but they don't seem to be used + if (rect->rect.size.x < 0) { + SWAP(bA->pos, bB->pos); + SWAP(bC->pos, bD->pos); + } + if (rect->rect.size.y < 0) { + SWAP(bA->pos, bD->pos); + SWAP(bB->pos, bC->pos); + } + + if (r_fill_state.transform_mode == TM_ALL) { + _software_transform_vertex(bA->pos, r_fill_state.transform_combined); + _software_transform_vertex(bB->pos, r_fill_state.transform_combined); + _software_transform_vertex(bC->pos, r_fill_state.transform_combined); + _software_transform_vertex(bD->pos, r_fill_state.transform_combined); + } + + // uvs + Vector2 src_min; + Vector2 src_max; + if (rect->flags & CANVAS_RECT_REGION) { + src_min = rect->source.position; + src_max = src_min + rect->source.size; + + src_min *= r_fill_state.texpixel_size; + src_max *= r_fill_state.texpixel_size; + + const float uv_epsilon = bdata.settings_uv_contract_amount; + + // nudge offset for the maximum to prevent precision error on GPU reading into line outside the source rect + // this is very difficult to get right. + if (r_fill_state.contract_uvs) { + src_min.x += uv_epsilon; + src_min.y += uv_epsilon; + src_max.x -= uv_epsilon; + src_max.y -= uv_epsilon; + } + } else { + src_min = Vector2(0, 0); + src_max = Vector2(1, 1); + } + + // 10% faster calculating the max first + Vector2 uvs[4] = { + src_min, + Vector2(src_max.x, src_min.y), + src_max, + Vector2(src_min.x, src_max.y), + }; + + // for encoding in light angle + // flips should be optimized out when not being used for light angle. + bool flip_h = false; + bool flip_v = false; + + if (rect->flags & CANVAS_RECT_TRANSPOSE) { + SWAP(uvs[1], uvs[3]); + } + + if (rect->flags & CANVAS_RECT_FLIP_H) { + SWAP(uvs[0], uvs[1]); + SWAP(uvs[2], uvs[3]); + flip_h = !flip_h; + flip_v = !flip_v; + } + if (rect->flags & CANVAS_RECT_FLIP_V) { + SWAP(uvs[0], uvs[3]); + SWAP(uvs[1], uvs[2]); + flip_v = !flip_v; + } + + bA->uv.set(uvs[0]); + bB->uv.set(uvs[1]); + bC->uv.set(uvs[2]); + bD->uv.set(uvs[3]); + + if (SEND_LIGHT_ANGLES) { + // we can either keep the light angles in sync with the verts when writing, + // or sync them up during translation. We are syncing in translation. + // N.B. There may be batches that don't require light_angles between batches that do. + float *angles = bdata.light_angles.request(4); +#if defined(TOOLS_ENABLED) && defined(DEBUG_ENABLED) + CRASH_COND(angles == nullptr); +#endif + + float angle = 0.0f; + const float TWO_PI = Math_PI * 2; + + if (r_fill_state.transform_mode != TM_NONE) { + + const Transform2D &tr = r_fill_state.transform_combined; + + // apply to an x axis + // the x axis and y axis can be taken directly from the transform (no need to xform identity vectors) + Vector2 x_axis(tr.elements[0][0], tr.elements[1][0]); + + // have to do a y axis to check for scaling flips + // this is hassle and extra slowness. We could only allow flips via the flags. + Vector2 y_axis(tr.elements[0][1], tr.elements[1][1]); + + // has the x / y axis flipped due to scaling? + float cross = x_axis.cross(y_axis); + if (cross < 0.0f) { + flip_v = !flip_v; + } + + // passing an angle is smaller than a vector, it can be reconstructed in the shader + angle = x_axis.angle(); + + // we don't want negative angles, as negative is used to encode flips. + // This moves range from -PI to PI to 0 to TWO_PI + if (angle < 0.0f) + angle += TWO_PI; + + } // if transform needed + + // if horizontal flip, angle is shifted by 180 degrees + if (flip_h) { + angle += Math_PI; + + // mod to get back to 0 to TWO_PI range + angle = fmodf(angle, TWO_PI); + } + + // add 1 (to take care of zero floating point error with sign) + angle += 1.0f; + + // flip if necessary to indicate a vertical flip in the shader + if (flip_v) + angle *= -1.0f; + + // light angle must be sent for each vert, instead as a single uniform in the uniform draw method + // this has the benefit of enabling batching with light angles. + for (int n = 0; n < 4; n++) { + angles[n] = angle; + } + } + + // increment quad count + bdata.total_quads++; + + return false; +} + #endif // RASTERIZERCANVASGLES2_H diff --git a/drivers/gles2/rasterizer_gles2.cpp b/drivers/gles2/rasterizer_gles2.cpp index 7aa094644d..ec49f460c4 100644 --- a/drivers/gles2/rasterizer_gles2.cpp +++ b/drivers/gles2/rasterizer_gles2.cpp @@ -407,7 +407,7 @@ void RasterizerGLES2::blit_render_target_to_screen(RID p_render_target, const Re RasterizerStorageGLES2::RenderTarget *rt = storage->render_target_owner.getornull(p_render_target); ERR_FAIL_COND(!rt); - canvas->state.canvas_shader.set_conditional(CanvasShaderGLES2::USE_TEXTURE_RECT, true); + canvas->_set_texture_rect_mode(true); canvas->state.canvas_shader.set_custom_shader(0); canvas->state.canvas_shader.bind(); diff --git a/drivers/gles2/shaders/canvas.glsl b/drivers/gles2/shaders/canvas.glsl index eb2c210a64..04f313d343 100644 --- a/drivers/gles2/shaders/canvas.glsl +++ b/drivers/gles2/shaders/canvas.glsl @@ -18,6 +18,12 @@ uniform highp mat4 projection_matrix; uniform highp mat4 modelview_matrix; uniform highp mat4 extra_matrix; attribute highp vec2 vertex; // attrib:0 + +#ifdef USE_LIGHT_ANGLE +// shared with tangent, not used in canvas shader +attribute highp float light_angle; // attrib:2 +#endif + attribute vec4 color_attrib; // attrib:3 attribute vec2 uv_attrib; // attrib:4 @@ -219,12 +225,34 @@ VERTEX_SHADER_CODE pos = outvec.xy; #endif +#ifdef USE_LIGHT_ANGLE + // we add a fixed offset because we are using the sign later, + // and don't want floating point error around 0.0 + float la = abs(light_angle) - 1.0; + + // vector light angle + vec4 vla; + vla.xy = vec2(cos(la), sin(la)); + vla.zw = vec2(-vla.y, vla.x); + + // vertical flip encoded in the sign + vla.zw *= sign(light_angle); + + // apply the transform matrix. + // The rotate will be encoded in the transform matrix for single rects, + // and just the flips in the light angle. + // For batching we will encode the rotation and the flips + // in the light angle, and can use the same shader. + local_rot.xy = normalize((modelview_matrix * (extra_matrix_instance * vec4(vla.xy, 0.0, 0.0))).xy); + local_rot.zw = normalize((modelview_matrix * (extra_matrix_instance * vec4(vla.zw, 0.0, 0.0))).xy); +#else local_rot.xy = normalize((modelview_matrix * (extra_matrix_instance * vec4(1.0, 0.0, 0.0, 0.0))).xy); local_rot.zw = normalize((modelview_matrix * (extra_matrix_instance * vec4(0.0, 1.0, 0.0, 0.0))).xy); #ifdef USE_TEXTURE_RECT local_rot.xy *= sign(src_rect.z); local_rot.zw *= sign(src_rect.w); #endif +#endif // not using light angle #endif }