diff --git a/core/bind/core_bind.cpp b/core/bind/core_bind.cpp index e61e392a79..1dc48a95a0 100644 --- a/core/bind/core_bind.cpp +++ b/core/bind/core_bind.cpp @@ -2612,7 +2612,7 @@ void _Semaphore::_bind_methods() { _Semaphore::_Semaphore() { - semaphore = Semaphore::create(); + semaphore = SemaphoreOld::create(); } _Semaphore::~_Semaphore() { diff --git a/core/bind/core_bind.h b/core/bind/core_bind.h index 87da51f97e..955375cc99 100644 --- a/core/bind/core_bind.h +++ b/core/bind/core_bind.h @@ -642,7 +642,7 @@ public: class _Semaphore : public Reference { GDCLASS(_Semaphore, Reference); - Semaphore *semaphore; + SemaphoreOld *semaphore; static void _bind_methods(); diff --git a/core/command_queue_mt.cpp b/core/command_queue_mt.cpp index c20735939d..861ca8d1d3 100644 --- a/core/command_queue_mt.cpp +++ b/core/command_queue_mt.cpp @@ -111,11 +111,11 @@ CommandQueueMT::CommandQueueMT(bool p_sync) { for (int i = 0; i < SYNC_SEMAPHORES; i++) { - sync_sems[i].sem = Semaphore::create(); + sync_sems[i].sem = SemaphoreOld::create(); sync_sems[i].in_use = false; } if (p_sync) - sync = Semaphore::create(); + sync = SemaphoreOld::create(); else sync = NULL; } diff --git a/core/command_queue_mt.h b/core/command_queue_mt.h index ba43fa07f3..2b6e0201f0 100644 --- a/core/command_queue_mt.h +++ b/core/command_queue_mt.h @@ -297,7 +297,7 @@ class CommandQueueMT { struct SyncSemaphore { - Semaphore *sem; + SemaphoreOld *sem; bool in_use; }; @@ -342,7 +342,7 @@ class CommandQueueMT { uint32_t dealloc_ptr; SyncSemaphore sync_sems[SYNC_SEMAPHORES]; Mutex *mutex; - Semaphore *sync; + SemaphoreOld *sync; template T *allocate() { diff --git a/core/io/file_access_network.cpp b/core/io/file_access_network.cpp index 1b09ac7208..202eb89dbd 100644 --- a/core/io/file_access_network.cpp +++ b/core/io/file_access_network.cpp @@ -231,7 +231,7 @@ FileAccessNetworkClient::FileAccessNetworkClient() { singleton = this; last_id = 0; client.instance(); - sem = Semaphore::create(); + sem = SemaphoreOld::create(); lockcount = 0; } @@ -522,8 +522,8 @@ FileAccessNetwork::FileAccessNetwork() { eof_flag = false; opened = false; pos = 0; - sem = Semaphore::create(); - page_sem = Semaphore::create(); + sem = SemaphoreOld::create(); + page_sem = SemaphoreOld::create(); buffer_mutex = Mutex::create(); FileAccessNetworkClient *nc = FileAccessNetworkClient::singleton; nc->lock_mutex(); diff --git a/core/io/file_access_network.h b/core/io/file_access_network.h index e2da1d0893..f329abf7c5 100644 --- a/core/io/file_access_network.h +++ b/core/io/file_access_network.h @@ -49,7 +49,7 @@ class FileAccessNetworkClient { List block_requests; - Semaphore *sem; + SemaphoreOld *sem; Thread *thread; bool quit; Mutex *mutex; @@ -85,8 +85,8 @@ public: class FileAccessNetwork : public FileAccess { - Semaphore *sem; - Semaphore *page_sem; + SemaphoreOld *sem; + SemaphoreOld *page_sem; Mutex *buffer_mutex; bool opened; size_t total_size; diff --git a/core/io/ip.cpp b/core/io/ip.cpp index 23f6ca25d0..7d18117711 100644 --- a/core/io/ip.cpp +++ b/core/io/ip.cpp @@ -71,7 +71,7 @@ struct _IP_ResolverPrivate { } Mutex *mutex; - Semaphore *sem; + SemaphoreOld *sem; Thread *thread; //Semaphore* semaphore; @@ -319,7 +319,7 @@ IP::IP() { #ifndef NO_THREADS - resolver->sem = Semaphore::create(); + resolver->sem = SemaphoreOld::create(); if (resolver->sem) { resolver->thread_abort = false; diff --git a/core/os/semaphore.cpp b/core/os/semaphore.cpp index b2ba9716f0..2c20f234d0 100644 --- a/core/os/semaphore.cpp +++ b/core/os/semaphore.cpp @@ -32,14 +32,14 @@ #include "core/error_macros.h" -Semaphore *(*Semaphore::create_func)() = 0; +SemaphoreOld *(*SemaphoreOld::create_func)() = 0; -Semaphore *Semaphore::create() { +SemaphoreOld *SemaphoreOld::create() { ERR_FAIL_COND_V(!create_func, 0); return create_func(); } -Semaphore::~Semaphore() { +SemaphoreOld::~SemaphoreOld() { } diff --git a/core/os/semaphore.h b/core/os/semaphore.h index 9f3c0f549c..f16a15a6db 100644 --- a/core/os/semaphore.h +++ b/core/os/semaphore.h @@ -32,19 +32,53 @@ #define SEMAPHORE_H #include "core/error_list.h" +#include "core/typedefs.h" + +#include +#include class Semaphore { +private: + std::mutex mutex_; + std::condition_variable condition_; + unsigned long count_ = 0; // Initialized as locked. + +public: + _ALWAYS_INLINE_ void post() { + std::lock_guard lock(mutex_); + ++count_; + condition_.notify_one(); + } + + _ALWAYS_INLINE_ void wait() { + std::unique_lock lock(mutex_); + while (!count_) // Handle spurious wake-ups. + condition_.wait(lock); + --count_; + } + + _ALWAYS_INLINE_ bool try_wait() { + std::lock_guard lock(mutex_); + if (count_) { + --count_; + return true; + } + return false; + } +}; + +class SemaphoreOld { protected: - static Semaphore *(*create_func)(); + static SemaphoreOld *(*create_func)(); public: virtual Error wait() = 0; ///< wait until semaphore has positive value, then decrement and pass virtual Error post() = 0; ///< unlock the semaphore, incrementing the value virtual int get() const = 0; ///< get semaphore value - static Semaphore *create(); ///< Create a mutex + static SemaphoreOld *create(); ///< Create a mutex - virtual ~Semaphore(); + virtual ~SemaphoreOld(); }; #endif diff --git a/core/os/thread_dummy.cpp b/core/os/thread_dummy.cpp index d4f65b0312..916aeeda30 100644 --- a/core/os/thread_dummy.cpp +++ b/core/os/thread_dummy.cpp @@ -48,12 +48,12 @@ void MutexDummy::make_default() { Mutex::create_func = &MutexDummy::create; }; -Semaphore *SemaphoreDummy::create() { +SemaphoreOld *SemaphoreDummy::create() { return memnew(SemaphoreDummy); }; void SemaphoreDummy::make_default() { - Semaphore::create_func = &SemaphoreDummy::create; + SemaphoreOld::create_func = &SemaphoreDummy::create; }; RWLock *RWLockDummy::create() { diff --git a/core/os/thread_dummy.h b/core/os/thread_dummy.h index c8b52ae4dd..9329cdaa32 100644 --- a/core/os/thread_dummy.h +++ b/core/os/thread_dummy.h @@ -58,9 +58,9 @@ public: static void make_default(); }; -class SemaphoreDummy : public Semaphore { +class SemaphoreDummy : public SemaphoreOld { - static Semaphore *create(); + static SemaphoreOld *create(); public: virtual Error wait() { return OK; }; diff --git a/core/rid_owner.h b/core/rid_owner.h index 7490416c19..8206e5b4fc 100644 --- a/core/rid_owner.h +++ b/core/rid_owner.h @@ -3,6 +3,8 @@ #include "core/print_string.h" #include "core/rid.h" +#include "core/spin_lock.h" + #include class RID_AllocBase { @@ -28,7 +30,7 @@ public: virtual ~RID_AllocBase() {} }; -template +template class RID_Alloc : public RID_AllocBase { T **chunks; @@ -41,9 +43,15 @@ class RID_Alloc : public RID_AllocBase { const char *description; + SpinLock spin_lock; + public: RID make_rid(const T &p_value) { + if (THREAD_SAFE) { + spin_lock.lock(); + } + if (alloc_count == max_alloc) { //allocate a new chunk uint32_t chunk_count = alloc_count == 0 ? 0 : (max_alloc / elements_in_chunk); @@ -85,11 +93,19 @@ public: validator_chunks[free_chunk][free_element] = validator; alloc_count++; + if (THREAD_SAFE) { + spin_lock.unlock(); + } + return _make_from_id(id); } _FORCE_INLINE_ T *getornull(const RID &p_rid) { + if (THREAD_SAFE) { + spin_lock.lock(); + } + uint64_t id = p_rid.get_id(); uint32_t idx = uint32_t(id & 0xFFFFFFFF); if (unlikely(idx >= max_alloc)) { @@ -104,14 +120,27 @@ public: return NULL; } - return &chunks[idx_chunk][idx_element]; + T *ptr = &chunks[idx_chunk][idx_element]; + + if (THREAD_SAFE) { + spin_lock.unlock(); + } + + return ptr; } _FORCE_INLINE_ bool owns(const RID &p_rid) { + if (THREAD_SAFE) { + spin_lock.lock(); + } + uint64_t id = p_rid.get_id(); uint32_t idx = uint32_t(id & 0xFFFFFFFF); if (unlikely(idx >= max_alloc)) { + if (THREAD_SAFE) { + spin_lock.unlock(); + } return false; } @@ -119,11 +148,22 @@ public: uint32_t idx_element = idx % elements_in_chunk; uint32_t validator = uint32_t(id >> 32); - return validator_chunks[idx_chunk][idx_element] == validator; + + bool owned = validator_chunks[idx_chunk][idx_element] == validator; + + if (THREAD_SAFE) { + spin_lock.unlock(); + } + + return owned; } _FORCE_INLINE_ void free(const RID &p_rid) { + if (THREAD_SAFE) { + spin_lock.lock(); + } + uint64_t id = p_rid.get_id(); uint32_t idx = uint32_t(id & 0xFFFFFFFF); ERR_FAIL_COND(idx >= max_alloc); @@ -139,6 +179,10 @@ public: alloc_count--; free_list_chunks[alloc_count / elements_in_chunk][alloc_count % elements_in_chunk] = idx; + + if (THREAD_SAFE) { + spin_lock.unlock(); + } } _FORCE_INLINE_ uint32_t get_rid_count() const { @@ -147,8 +191,15 @@ public: _FORCE_INLINE_ T *get_rid_by_index(uint32_t p_index) { ERR_FAIL_INDEX_V(p_index, alloc_count, NULL); + if (THREAD_SAFE) { + spin_lock.lock(); + } uint64_t idx = free_list_chunks[p_index / elements_in_chunk][p_index % elements_in_chunk]; - return &chunks[idx / elements_in_chunk][idx % elements_in_chunk]; + T *ptr = &chunks[idx / elements_in_chunk][idx % elements_in_chunk]; + if (THREAD_SAFE) { + spin_lock.unlock(); + } + return ptr; } void get_owned_list(List *p_owned) { @@ -203,9 +254,9 @@ public: } }; -template +template class RID_PtrOwner { - RID_Alloc alloc; + RID_Alloc alloc; public: _FORCE_INLINE_ RID make_rid(T *p_ptr) { @@ -239,9 +290,9 @@ public: alloc(p_target_chunk_byte_size) {} }; -template +template class RID_Owner { - RID_Alloc alloc; + RID_Alloc alloc; public: _FORCE_INLINE_ RID make_rid(const T &p_ptr) { diff --git a/core/spin_lock.h b/core/spin_lock.h new file mode 100644 index 0000000000..07f865d91a --- /dev/null +++ b/core/spin_lock.h @@ -0,0 +1,20 @@ +#ifndef SPIN_LOCK_H +#define SPIN_LOCK_H + +#include "core/typedefs.h" +#include + +class SpinLock { + std::atomic_flag locked = ATOMIC_FLAG_INIT; + +public: + _ALWAYS_INLINE_ void lock() { + while (locked.test_and_set(std::memory_order_acquire)) { + ; + } + } + _ALWAYS_INLINE_ void unlock() { + locked.clear(std::memory_order_release); + } +}; +#endif // SPIN_LOCK_H diff --git a/core/thread_work_pool.cpp b/core/thread_work_pool.cpp new file mode 100644 index 0000000000..aafb9c11d3 --- /dev/null +++ b/core/thread_work_pool.cpp @@ -0,0 +1,53 @@ +#include "thread_work_pool.h" +#include "core/os/os.h" + +void ThreadWorkPool::_thread_function(ThreadData *p_thread) { + + while (true) { + p_thread->start.wait(); + if (p_thread->exit.load()) { + break; + } + p_thread->work->work(); + p_thread->completed.post(); + } +} + +void ThreadWorkPool::init(int p_thread_count) { + ERR_FAIL_COND(threads != nullptr); + if (p_thread_count < 0) { + p_thread_count = OS::get_singleton()->get_processor_count(); + } + + thread_count = p_thread_count; + threads = memnew_arr(ThreadData, thread_count); + + for (uint32_t i = 0; i < thread_count; i++) { + threads[i].exit.store(false); + threads[i].thread = memnew(std::thread(ThreadWorkPool::_thread_function, &threads[i])); + } +} + +void ThreadWorkPool::finish() { + + if (threads == nullptr) { + return; + } + + for (uint32_t i = 0; i < thread_count; i++) { + threads[i].exit.store(true); + threads[i].start.post(); + } + for (uint32_t i = 0; i < thread_count; i++) { + threads[i].thread->join(); + memdelete(threads[i].thread); + } + + memdelete_arr(threads); + threads = nullptr; +} + +ThreadWorkPool::~ThreadWorkPool() { + + finish(); +} diff --git a/core/thread_work_pool.h b/core/thread_work_pool.h new file mode 100644 index 0000000000..165eb391e2 --- /dev/null +++ b/core/thread_work_pool.h @@ -0,0 +1,78 @@ +#ifndef THREAD_WORK_POOL_H +#define THREAD_WORK_POOL_H + +#include "core/os/memory.h" +#include "core/os/semaphore.h" +#include +#include +class ThreadWorkPool { + + std::atomic index; + + struct BaseWork { + std::atomic *index; + uint32_t max_elements; + virtual void work() = 0; + }; + + template + struct Work : public BaseWork { + C *instance; + M method; + U userdata; + virtual void work() { + + while (true) { + uint32_t work_index = index->fetch_add(1, std::memory_order_relaxed); + if (work_index >= max_elements) { + break; + } + (instance->*method)(work_index, userdata); + } + } + }; + + struct ThreadData { + std::thread *thread; + Semaphore start; + Semaphore completed; + std::atomic exit; + BaseWork *work; + }; + + ThreadData *threads = nullptr; + uint32_t thread_count = 0; + + static void _thread_function(ThreadData *p_thread); + +public: + template + void do_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) { + + ERR_FAIL_COND(!threads); //never initialized + + index.store(0); + + Work *w = memnew((Work)); + w->instance = p_instance; + w->userdata = p_userdata; + w->method = p_method; + w->index = &index; + w->max_elements = p_elements; + + for (uint32_t i = 0; i < thread_count; i++) { + threads[i].work = w; + threads[i].start.post(); + } + for (uint32_t i = 0; i < thread_count; i++) { + threads[i].completed.wait(); + threads[i].work = nullptr; + } + } + + void init(int p_thread_count = -1); + void finish(); + ~ThreadWorkPool(); +}; + +#endif // THREAD_POOL_H diff --git a/drivers/unix/semaphore_posix.cpp b/drivers/unix/semaphore_posix.cpp index 5f412adea1..b532b09cd6 100644 --- a/drivers/unix/semaphore_posix.cpp +++ b/drivers/unix/semaphore_posix.cpp @@ -62,7 +62,7 @@ int SemaphorePosix::get() const { return val; } -Semaphore *SemaphorePosix::create_semaphore_posix() { +SemaphoreOld *SemaphorePosix::create_semaphore_posix() { return memnew(SemaphorePosix); } diff --git a/drivers/unix/semaphore_posix.h b/drivers/unix/semaphore_posix.h index e06f6316db..2bffe6933d 100644 --- a/drivers/unix/semaphore_posix.h +++ b/drivers/unix/semaphore_posix.h @@ -37,11 +37,11 @@ #include -class SemaphorePosix : public Semaphore { +class SemaphorePosix : public SemaphoreOld { mutable sem_t sem; - static Semaphore *create_semaphore_posix(); + static SemaphoreOld *create_semaphore_posix(); public: virtual Error wait(); diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index f6154a3cbd..a14d45067a 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -3448,8 +3448,6 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector &p_stages) { - _THREAD_SAFE_METHOD_ - //descriptor layouts Vector > set_bindings; Vector > uniform_info; @@ -3694,6 +3692,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector &p_stages //all good, let's create modules + _THREAD_SAFE_METHOD_ + Shader shader; shader.vertex_input_locations = vertex_input_locations; diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index 5b3b7a5f47..87971ba2b8 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -114,7 +114,7 @@ class RenderingDeviceVulkan : public RenderingDevice { RID owner; }; - RID_Owner texture_owner; + RID_Owner texture_owner; uint32_t texture_upload_region_size_px; PoolVector _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer); @@ -264,7 +264,7 @@ class RenderingDeviceVulkan : public RenderingDevice { Size2 size; }; - RID_Owner framebuffer_owner; + RID_Owner framebuffer_owner; /***********************/ /**** VERTEX BUFFER ****/ @@ -279,7 +279,7 @@ class RenderingDeviceVulkan : public RenderingDevice { // This mapping is done here internally, and it's not // exposed. - RID_Owner vertex_buffer_owner; + RID_Owner vertex_buffer_owner; struct VertexDescriptionKey { Vector vertex_formats; @@ -359,7 +359,7 @@ class RenderingDeviceVulkan : public RenderingDevice { Vector offsets; }; - RID_Owner vertex_array_owner; + RID_Owner vertex_array_owner; struct IndexBuffer : public Buffer { uint32_t max_index; //used for validation @@ -368,7 +368,7 @@ class RenderingDeviceVulkan : public RenderingDevice { bool supports_restart_indices; }; - RID_Owner index_buffer_owner; + RID_Owner index_buffer_owner; struct IndexArray { uint32_t max_index; //remember the maximum index here too, for validation @@ -379,7 +379,7 @@ class RenderingDeviceVulkan : public RenderingDevice { bool supports_restart_indices; }; - RID_Owner index_array_owner; + RID_Owner index_array_owner; /****************/ /**** SHADER ****/ @@ -495,7 +495,7 @@ class RenderingDeviceVulkan : public RenderingDevice { String _shader_uniform_debug(RID p_shader, int p_set = -1); - RID_Owner shader_owner; + RID_Owner shader_owner; /******************/ /**** UNIFORMS ****/ @@ -559,8 +559,8 @@ class RenderingDeviceVulkan : public RenderingDevice { DescriptorPool *_descriptor_pool_allocate(const DescriptorPoolKey &p_key); void _descriptor_pool_free(const DescriptorPoolKey &p_key, DescriptorPool *p_pool); - RID_Owner uniform_buffer_owner; - RID_Owner storage_buffer_owner; + RID_Owner uniform_buffer_owner; + RID_Owner storage_buffer_owner; //texture buffer needs a view struct TextureBuffer { @@ -568,7 +568,7 @@ class RenderingDeviceVulkan : public RenderingDevice { VkBufferView view; }; - RID_Owner texture_buffer_owner; + RID_Owner texture_buffer_owner; // This structure contains the descriptor set. They _need_ to be allocated // for a shader (and will be erased when this shader is erased), but should @@ -589,7 +589,7 @@ class RenderingDeviceVulkan : public RenderingDevice { Vector attachable_textures; //used for validation }; - RID_Owner uniform_set_owner; + RID_Owner uniform_set_owner; /*******************/ /**** PIPELINES ****/ @@ -607,7 +607,7 @@ class RenderingDeviceVulkan : public RenderingDevice { // was not supplied as intended. struct RenderPipeline { - //Cached values for validation + //Cached values for validation #ifdef DEBUG_ENABLED struct Validation { FramebufferFormatID framebuffer_format; @@ -627,7 +627,7 @@ class RenderingDeviceVulkan : public RenderingDevice { uint32_t push_constant_stages; }; - RID_Owner pipeline_owner; + RID_Owner pipeline_owner; /*******************/ /**** DRAW LIST ****/ diff --git a/drivers/windows/semaphore_windows.cpp b/drivers/windows/semaphore_windows.cpp index ea8032ffe2..1b53e311ff 100644 --- a/drivers/windows/semaphore_windows.cpp +++ b/drivers/windows/semaphore_windows.cpp @@ -61,7 +61,7 @@ int SemaphoreWindows::get() const { ERR_FAIL_V(-1); } -Semaphore *SemaphoreWindows::create_semaphore_windows() { +SemaphoreOld *SemaphoreWindows::create_semaphore_windows() { return memnew(SemaphoreWindows); } diff --git a/drivers/windows/semaphore_windows.h b/drivers/windows/semaphore_windows.h index 01712b6778..159e8b3b96 100644 --- a/drivers/windows/semaphore_windows.h +++ b/drivers/windows/semaphore_windows.h @@ -37,11 +37,11 @@ #include -class SemaphoreWindows : public Semaphore { +class SemaphoreWindows : public SemaphoreOld { mutable HANDLE semaphore; - static Semaphore *create_semaphore_windows(); + static SemaphoreOld *create_semaphore_windows(); public: virtual Error wait(); diff --git a/editor/editor_resource_preview.cpp b/editor/editor_resource_preview.cpp index 92bee53fdb..c0f58c9aae 100644 --- a/editor/editor_resource_preview.cpp +++ b/editor/editor_resource_preview.cpp @@ -476,7 +476,7 @@ EditorResourcePreview::EditorResourcePreview() { thread = NULL; singleton = this; preview_mutex = Mutex::create(); - preview_sem = Semaphore::create(); + preview_sem = SemaphoreOld::create(); order = 0; exit = false; exited = false; diff --git a/editor/editor_resource_preview.h b/editor/editor_resource_preview.h index 94f4bcb466..0a89154243 100644 --- a/editor/editor_resource_preview.h +++ b/editor/editor_resource_preview.h @@ -71,7 +71,7 @@ class EditorResourcePreview : public Node { List queue; Mutex *preview_mutex; - Semaphore *preview_sem; + SemaphoreOld *preview_sem; Thread *thread; volatile bool exit; volatile bool exited; diff --git a/modules/glslang/register_types.cpp b/modules/glslang/register_types.cpp index ef159e743d..843bd93c05 100644 --- a/modules/glslang/register_types.cpp +++ b/modules/glslang/register_types.cpp @@ -140,9 +140,6 @@ static PoolVector _compile_shader_glsl(RenderingDevice::ShaderStage p_s ERR_FAIL_COND_V(p_language==RenderingDevice::SHADER_LANGUAGE_HLSL,ret); - // initialize in case it's not initialized. This is done once per thread - // and it's safe to call multiple times - glslang::InitializeProcess(); EShLanguage stages[RenderingDevice::SHADER_STAGE_MAX] = { EShLangVertex, EShLangFragment, @@ -229,6 +226,9 @@ static PoolVector _compile_shader_glsl(RenderingDevice::ShaderStage p_s } void preregister_glslang_types() { + // initialize in case it's not initialized. This is done once per thread + // and it's safe to call multiple times + glslang::InitializeProcess(); RenderingDevice::shader_set_compile_function(_compile_shader_glsl); } @@ -236,5 +236,5 @@ void register_glslang_types() { } void unregister_glslang_types() { - + glslang::FinalizeProcess(); } diff --git a/platform/iphone/semaphore_iphone.cpp b/platform/iphone/semaphore_iphone.cpp index 0c1d4d2d5c..0461f58c40 100644 --- a/platform/iphone/semaphore_iphone.cpp +++ b/platform/iphone/semaphore_iphone.cpp @@ -91,7 +91,7 @@ int SemaphoreIphone::get() const { return 0; } -Semaphore *SemaphoreIphone::create_semaphore_iphone() { +SemaphoreOld *SemaphoreIphone::create_semaphore_iphone() { return memnew(SemaphoreIphone); } diff --git a/platform/iphone/semaphore_iphone.h b/platform/iphone/semaphore_iphone.h index 9356c65f1e..54ff3c17f9 100644 --- a/platform/iphone/semaphore_iphone.h +++ b/platform/iphone/semaphore_iphone.h @@ -39,11 +39,11 @@ typedef struct cgsem cgsem_t; #include "core/os/semaphore.h" -class SemaphoreIphone : public Semaphore { +class SemaphoreIphone : public SemaphoreOld { mutable cgsem_t sem; - static Semaphore *create_semaphore_iphone(); + static SemaphoreOld *create_semaphore_iphone(); public: virtual Error wait(); diff --git a/platform/osx/semaphore_osx.cpp b/platform/osx/semaphore_osx.cpp index e75f5103cc..e4e5991637 100644 --- a/platform/osx/semaphore_osx.cpp +++ b/platform/osx/semaphore_osx.cpp @@ -86,7 +86,7 @@ int SemaphoreOSX::get() const { return 0; } -Semaphore *SemaphoreOSX::create_semaphore_osx() { +SemaphoreOld *SemaphoreOSX::create_semaphore_osx() { return memnew(SemaphoreOSX); } diff --git a/platform/osx/semaphore_osx.h b/platform/osx/semaphore_osx.h index 2348c8efa6..9aa2b47bc8 100644 --- a/platform/osx/semaphore_osx.h +++ b/platform/osx/semaphore_osx.h @@ -39,11 +39,11 @@ typedef struct cgsem cgsem_t; #include "core/os/semaphore.h" -class SemaphoreOSX : public Semaphore { +class SemaphoreOSX : public SemaphoreOld { mutable cgsem_t sem; - static Semaphore *create_semaphore_osx(); + static SemaphoreOld *create_semaphore_osx(); public: virtual Error wait(); diff --git a/servers/physics_2d/physics_2d_server_wrap_mt.cpp b/servers/physics_2d/physics_2d_server_wrap_mt.cpp index 995d763c6d..291693de39 100644 --- a/servers/physics_2d/physics_2d_server_wrap_mt.cpp +++ b/servers/physics_2d/physics_2d_server_wrap_mt.cpp @@ -107,7 +107,7 @@ void Physics2DServerWrapMT::init() { if (create_thread) { - step_sem = Semaphore::create(); + step_sem = SemaphoreOld::create(); //OS::get_singleton()->release_rendering_thread(); if (create_thread) { thread = Thread::create(_thread_callback, this); diff --git a/servers/physics_2d/physics_2d_server_wrap_mt.h b/servers/physics_2d/physics_2d_server_wrap_mt.h index eec0a3933f..6bd0a09de5 100644 --- a/servers/physics_2d/physics_2d_server_wrap_mt.h +++ b/servers/physics_2d/physics_2d_server_wrap_mt.h @@ -58,7 +58,7 @@ class Physics2DServerWrapMT : public Physics2DServer { volatile bool step_thread_up; bool create_thread; - Semaphore *step_sem; + SemaphoreOld *step_sem; int step_pending; void thread_step(real_t p_delta); void thread_flush(); diff --git a/servers/visual/rasterizer_rd/rasterizer_rd.cpp b/servers/visual/rasterizer_rd/rasterizer_rd.cpp index 9be7a6e3f7..8a77d9a71e 100644 --- a/servers/visual/rasterizer_rd/rasterizer_rd.cpp +++ b/servers/visual/rasterizer_rd/rasterizer_rd.cpp @@ -76,7 +76,7 @@ void RasterizerRD::initialize() { RenderingDevice::ShaderStageData frag; frag.shader_stage = RenderingDevice::SHADER_STAGE_FRAGMENT; - frag.spir_v =RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_FRAGMENT, + frag.spir_v = RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_FRAGMENT, "#version 450\n" "layout (location = 0) in vec2 uv;\n" "layout (location = 0) out vec4 color;\n" @@ -120,8 +120,12 @@ void RasterizerRD::initialize() { } } +ThreadWorkPool RasterizerRD::thread_work_pool; + void RasterizerRD::finalize() { + thread_work_pool.finish(); + memdelete(scene); memdelete(canvas); memdelete(storage); @@ -133,6 +137,7 @@ void RasterizerRD::finalize() { } RasterizerRD::RasterizerRD() { + thread_work_pool.init(); time = 0; storage = memnew(RasterizerStorageRD); canvas = memnew(RasterizerCanvasRD(storage)); diff --git a/servers/visual/rasterizer_rd/rasterizer_rd.h b/servers/visual/rasterizer_rd/rasterizer_rd.h index 749d5c23ad..f5ee7ddc76 100644 --- a/servers/visual/rasterizer_rd/rasterizer_rd.h +++ b/servers/visual/rasterizer_rd/rasterizer_rd.h @@ -2,10 +2,12 @@ #define RASTERIZER_RD_H #include "core/os/os.h" +#include "core/thread_work_pool.h" #include "servers/visual/rasterizer.h" #include "servers/visual/rasterizer_rd/rasterizer_canvas_rd.h" #include "servers/visual/rasterizer_rd/rasterizer_scene_forward_rd.h" #include "servers/visual/rasterizer_rd/rasterizer_storage_rd.h" + class RasterizerRD : public Rasterizer { protected: RasterizerCanvasRD *canvas; @@ -51,6 +53,8 @@ public: virtual bool is_low_end() const { return true; } + static ThreadWorkPool thread_work_pool; + RasterizerRD(); ~RasterizerRD() {} }; diff --git a/servers/visual/rasterizer_rd/rasterizer_storage_rd.h b/servers/visual/rasterizer_rd/rasterizer_storage_rd.h index c2f60c08b3..c05d9a41f6 100644 --- a/servers/visual/rasterizer_rd/rasterizer_storage_rd.h +++ b/servers/visual/rasterizer_rd/rasterizer_storage_rd.h @@ -4,8 +4,8 @@ #include "core/rid_owner.h" #include "servers/visual/rasterizer.h" #include "servers/visual/rasterizer_rd/effects_rd.h" -#include "servers/visual/rendering_device.h" #include "servers/visual/rasterizer_rd/shader_compiler_rd.h" +#include "servers/visual/rendering_device.h" class RasterizerStorageRD : public RasterizerStorage { public: @@ -109,7 +109,8 @@ private: } }; - mutable RID_Owner texture_owner; + //textures can be created from threads, so this RID_Owner is thread safe + mutable RID_Owner texture_owner; Ref _validate_texture_format(const Ref &p_image, TextureToRDFormat &r_format); diff --git a/servers/visual/rasterizer_rd/shader_rd.cpp b/servers/visual/rasterizer_rd/shader_rd.cpp index 945fc25cab..58596f6a72 100644 --- a/servers/visual/rasterizer_rd/shader_rd.cpp +++ b/servers/visual/rasterizer_rd/shader_rd.cpp @@ -30,6 +30,7 @@ #include "shader_rd.h" #include "core/string_builder.h" +#include "rasterizer_rd.h" #include "servers/visual/rendering_device.h" void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name) { @@ -160,6 +161,121 @@ void ShaderRD::_clear_version(Version *p_version) { p_version->variants = NULL; } } + +void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) { + + Vector stages; + + String error; + String current_source; + RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX; + bool build_ok = true; + + { + //vertex stage + + StringBuilder builder; + + builder.append(vertex_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + builder.append(general_defines.get_data()); + builder.append(variant_defines[p_variant].get_data()); + + for (int j = 0; j < p_version->custom_defines.size(); j++) { + builder.append(p_version->custom_defines[j].get_data()); + } + + builder.append(vertex_code0.get_data()); //first part of vertex + + builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment) + + builder.append(vertex_code1.get_data()); //second part of vertex + + builder.append(p_version->vertex_globals.get_data()); // vertex globals + + builder.append(vertex_code2.get_data()); //third part of vertex + + builder.append(p_version->vertex_code.get_data()); // code + + builder.append(vertex_code3.get_data()); //fourth of vertex + + current_source = builder.as_string(); + RD::ShaderStageData stage; + stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_VERTEX, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spir_v.size() == 0) { + build_ok = false; + } else { + + stage.shader_stage = RD::SHADER_STAGE_VERTEX; + stages.push_back(stage); + } + } + + if (build_ok) { + //fragment stage + current_stage = RD::SHADER_STAGE_FRAGMENT; + + StringBuilder builder; + + builder.append(fragment_codev.get_data()); // version info (if exists) + builder.append("\n"); //make sure defines begin at newline + + builder.append(general_defines.get_data()); + builder.append(variant_defines[p_variant].get_data()); + for (int j = 0; j < p_version->custom_defines.size(); j++) { + builder.append(p_version->custom_defines[j].get_data()); + } + + builder.append(fragment_code0.get_data()); //first part of fragment + + builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment) + + builder.append(fragment_code1.get_data()); //first part of fragment + + builder.append(p_version->fragment_globals.get_data()); // fragment globals + + builder.append(fragment_code2.get_data()); //third part of fragment + + builder.append(p_version->fragment_light.get_data()); // fragment light + + builder.append(fragment_code3.get_data()); //fourth part of fragment + + builder.append(p_version->fragment_code.get_data()); // fragment code + + builder.append(fragment_code4.get_data()); //fourth part of fragment + + current_source = builder.as_string(); + RD::ShaderStageData stage; + stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_FRAGMENT, current_source, RD::SHADER_LANGUAGE_GLSL, &error); + if (stage.spir_v.size() == 0) { + build_ok = false; + } else { + + stage.shader_stage = RD::SHADER_STAGE_FRAGMENT; + stages.push_back(stage); + } + } + + if (!build_ok) { + variant_set_mutex.lock(); //properly print the errors + ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ")."); + ERR_PRINT(error); + +#ifdef DEBUG_ENABLED + ERR_PRINT("code:\n" + current_source.get_with_code_lines()); +#endif + + variant_set_mutex.unlock(); + return; + } + + RID shader = RD::get_singleton()->shader_create(stages); + + variant_set_mutex.lock(); + p_version->variants[p_variant] = shader; + variant_set_mutex.unlock(); +} + void ShaderRD::_compile_version(Version *p_version) { _clear_version(p_version); @@ -168,134 +284,34 @@ void ShaderRD::_compile_version(Version *p_version) { p_version->dirty = false; p_version->variants = memnew_arr(RID, variant_defines.size()); +#if 1 + RasterizerRD::thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version); +#else for (int i = 0; i < variant_defines.size(); i++) { - Vector stages; - - String error; - String current_source; - RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX; - bool build_ok=true; - - { - //vertex stage - - StringBuilder builder; - - builder.append(vertex_codev.get_data()); // version info (if exists) - builder.append("\n"); //make sure defines begin at newline - builder.append(general_defines.get_data()); - builder.append(variant_defines[i].get_data()); - - for (int j = 0; j < p_version->custom_defines.size(); j++) { - builder.append(p_version->custom_defines[j].get_data()); - } - - builder.append(vertex_code0.get_data()); //first part of vertex - - builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment) - - builder.append(vertex_code1.get_data()); //second part of vertex - - builder.append(p_version->vertex_globals.get_data()); // vertex globals - - builder.append(vertex_code2.get_data()); //third part of vertex - - builder.append(p_version->vertex_code.get_data()); // code - - builder.append(vertex_code3.get_data()); //fourth of vertex - - current_source = builder.as_string(); - RD::ShaderStageData stage; - stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_VERTEX,current_source,RD::SHADER_LANGUAGE_GLSL,&error); - if (stage.spir_v.size()==0) { - build_ok=false; - } else { - - stage.shader_stage = RD::SHADER_STAGE_VERTEX; - stages.push_back(stage); - } - } - - if (build_ok){ - //fragment stage - current_stage =RD::SHADER_STAGE_FRAGMENT; - - StringBuilder builder; - - builder.append(fragment_codev.get_data()); // version info (if exists) - builder.append("\n"); //make sure defines begin at newline - - builder.append(general_defines.get_data()); - builder.append(variant_defines[i].get_data()); - for (int j = 0; j < p_version->custom_defines.size(); j++) { - builder.append(p_version->custom_defines[j].get_data()); - } - - builder.append(fragment_code0.get_data()); //first part of fragment - - builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment) - - builder.append(fragment_code1.get_data()); //first part of fragment - - builder.append(p_version->fragment_globals.get_data()); // fragment globals - - builder.append(fragment_code2.get_data()); //third part of fragment - - builder.append(p_version->fragment_light.get_data()); // fragment light - - builder.append(fragment_code3.get_data()); //fourth part of fragment - - builder.append(p_version->fragment_code.get_data()); // fragment code - - builder.append(fragment_code4.get_data()); //fourth part of fragment - - current_source = builder.as_string(); - RD::ShaderStageData stage; - stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_FRAGMENT,current_source,RD::SHADER_LANGUAGE_GLSL,&error); - if (stage.spir_v.size()==0) { - build_ok=false; - } else { - - stage.shader_stage = RD::SHADER_STAGE_FRAGMENT; - stages.push_back(stage); - } - - } - - - if (!build_ok) { - ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(i) + " (" + variant_defines[i].get_data() + ")."); - ERR_PRINT(error); - -#ifdef DEBUG_ENABLED - ERR_PRINT("code:\n" + current_source.get_with_code_lines()); + _compile_variant(i, p_version); + } #endif - //clear versions if they exist - for (int j = 0; j < i; j++) { - RD::get_singleton()->free(p_version->variants[j]); - } - memdelete_arr(p_version->variants); - p_version->variants = NULL; - return; + bool all_valid = true; + for (int i = 0; i < variant_defines.size(); i++) { + if (p_version->variants[i].is_null()) { + all_valid = false; + break; } + } - RID shader = RD::get_singleton()->shader_create(stages); - - if (shader.is_null()) { - //clear versions if they exist - for (int j = 0; j < i; j++) { - RD::get_singleton()->free(p_version->variants[j]); + if (!all_valid) { + //clear versions if they exist + for (int i = 0; i < variant_defines.size(); i++) { + if (!p_version->variants[i].is_null()) { + RD::get_singleton()->free(p_version->variants[i]); } - - memdelete_arr(p_version->variants); - p_version->variants = NULL; - return; } - - p_version->variants[i] = shader; + memdelete_arr(p_version->variants); + p_version->variants = NULL; + return; } p_version->valid = true; diff --git a/servers/visual/rasterizer_rd/shader_rd.h b/servers/visual/rasterizer_rd/shader_rd.h index 558675935d..81169343d6 100644 --- a/servers/visual/rasterizer_rd/shader_rd.h +++ b/servers/visual/rasterizer_rd/shader_rd.h @@ -36,7 +36,7 @@ #include "core/rid_owner.h" #include "core/variant.h" #include - +#include /** @author Juan Linietsky */ @@ -67,6 +67,10 @@ class ShaderRD { bool initialize_needed; }; + std::mutex variant_set_mutex; + + void _compile_variant(uint32_t p_variant, Version *p_version); + void _clear_version(Version *p_version); void _compile_version(Version *p_version); diff --git a/servers/visual/visual_server_scene.cpp b/servers/visual/visual_server_scene.cpp index e2fb0a6351..27632be2b4 100644 --- a/servers/visual/visual_server_scene.cpp +++ b/servers/visual/visual_server_scene.cpp @@ -3511,7 +3511,7 @@ VisualServerScene *VisualServerScene::singleton = NULL; VisualServerScene::VisualServerScene() { #ifndef NO_THREADS - probe_bake_sem = Semaphore::create(); + probe_bake_sem = SemaphoreOld::create(); probe_bake_mutex = Mutex::create(); probe_bake_thread = Thread::create(_gi_probe_bake_threads, this); probe_bake_thread_exit = false; diff --git a/servers/visual/visual_server_scene.h b/servers/visual/visual_server_scene.h index d04cbf343a..4a04e3b056 100644 --- a/servers/visual/visual_server_scene.h +++ b/servers/visual/visual_server_scene.h @@ -517,7 +517,7 @@ public: volatile bool probe_bake_thread_exit; Thread *probe_bake_thread; - Semaphore *probe_bake_sem; + SemaphoreOld *probe_bake_sem; Mutex *probe_bake_mutex; List probe_bake_list;