Added a spinlock template as well as a thread work pool class.

Also, optimized shader compilation to happen on threads.
This commit is contained in:
Juan Linietsky 2019-07-29 12:59:18 -03:00
parent 4fe3ee1730
commit c613ead5fa
37 changed files with 458 additions and 192 deletions

View file

@ -2612,7 +2612,7 @@ void _Semaphore::_bind_methods() {
_Semaphore::_Semaphore() { _Semaphore::_Semaphore() {
semaphore = Semaphore::create(); semaphore = SemaphoreOld::create();
} }
_Semaphore::~_Semaphore() { _Semaphore::~_Semaphore() {

View file

@ -642,7 +642,7 @@ public:
class _Semaphore : public Reference { class _Semaphore : public Reference {
GDCLASS(_Semaphore, Reference); GDCLASS(_Semaphore, Reference);
Semaphore *semaphore; SemaphoreOld *semaphore;
static void _bind_methods(); static void _bind_methods();

View file

@ -111,11 +111,11 @@ CommandQueueMT::CommandQueueMT(bool p_sync) {
for (int i = 0; i < SYNC_SEMAPHORES; i++) { for (int i = 0; i < SYNC_SEMAPHORES; i++) {
sync_sems[i].sem = Semaphore::create(); sync_sems[i].sem = SemaphoreOld::create();
sync_sems[i].in_use = false; sync_sems[i].in_use = false;
} }
if (p_sync) if (p_sync)
sync = Semaphore::create(); sync = SemaphoreOld::create();
else else
sync = NULL; sync = NULL;
} }

View file

@ -297,7 +297,7 @@ class CommandQueueMT {
struct SyncSemaphore { struct SyncSemaphore {
Semaphore *sem; SemaphoreOld *sem;
bool in_use; bool in_use;
}; };
@ -342,7 +342,7 @@ class CommandQueueMT {
uint32_t dealloc_ptr; uint32_t dealloc_ptr;
SyncSemaphore sync_sems[SYNC_SEMAPHORES]; SyncSemaphore sync_sems[SYNC_SEMAPHORES];
Mutex *mutex; Mutex *mutex;
Semaphore *sync; SemaphoreOld *sync;
template <class T> template <class T>
T *allocate() { T *allocate() {

View file

@ -231,7 +231,7 @@ FileAccessNetworkClient::FileAccessNetworkClient() {
singleton = this; singleton = this;
last_id = 0; last_id = 0;
client.instance(); client.instance();
sem = Semaphore::create(); sem = SemaphoreOld::create();
lockcount = 0; lockcount = 0;
} }
@ -522,8 +522,8 @@ FileAccessNetwork::FileAccessNetwork() {
eof_flag = false; eof_flag = false;
opened = false; opened = false;
pos = 0; pos = 0;
sem = Semaphore::create(); sem = SemaphoreOld::create();
page_sem = Semaphore::create(); page_sem = SemaphoreOld::create();
buffer_mutex = Mutex::create(); buffer_mutex = Mutex::create();
FileAccessNetworkClient *nc = FileAccessNetworkClient::singleton; FileAccessNetworkClient *nc = FileAccessNetworkClient::singleton;
nc->lock_mutex(); nc->lock_mutex();

View file

@ -49,7 +49,7 @@ class FileAccessNetworkClient {
List<BlockRequest> block_requests; List<BlockRequest> block_requests;
Semaphore *sem; SemaphoreOld *sem;
Thread *thread; Thread *thread;
bool quit; bool quit;
Mutex *mutex; Mutex *mutex;
@ -85,8 +85,8 @@ public:
class FileAccessNetwork : public FileAccess { class FileAccessNetwork : public FileAccess {
Semaphore *sem; SemaphoreOld *sem;
Semaphore *page_sem; SemaphoreOld *page_sem;
Mutex *buffer_mutex; Mutex *buffer_mutex;
bool opened; bool opened;
size_t total_size; size_t total_size;

View file

@ -71,7 +71,7 @@ struct _IP_ResolverPrivate {
} }
Mutex *mutex; Mutex *mutex;
Semaphore *sem; SemaphoreOld *sem;
Thread *thread; Thread *thread;
//Semaphore* semaphore; //Semaphore* semaphore;
@ -319,7 +319,7 @@ IP::IP() {
#ifndef NO_THREADS #ifndef NO_THREADS
resolver->sem = Semaphore::create(); resolver->sem = SemaphoreOld::create();
if (resolver->sem) { if (resolver->sem) {
resolver->thread_abort = false; resolver->thread_abort = false;

View file

@ -32,14 +32,14 @@
#include "core/error_macros.h" #include "core/error_macros.h"
Semaphore *(*Semaphore::create_func)() = 0; SemaphoreOld *(*SemaphoreOld::create_func)() = 0;
Semaphore *Semaphore::create() { SemaphoreOld *SemaphoreOld::create() {
ERR_FAIL_COND_V(!create_func, 0); ERR_FAIL_COND_V(!create_func, 0);
return create_func(); return create_func();
} }
Semaphore::~Semaphore() { SemaphoreOld::~SemaphoreOld() {
} }

View file

@ -32,19 +32,53 @@
#define SEMAPHORE_H #define SEMAPHORE_H
#include "core/error_list.h" #include "core/error_list.h"
#include "core/typedefs.h"
#include <condition_variable>
#include <mutex>
class Semaphore { class Semaphore {
private:
std::mutex mutex_;
std::condition_variable condition_;
unsigned long count_ = 0; // Initialized as locked.
public:
_ALWAYS_INLINE_ void post() {
std::lock_guard<decltype(mutex_)> lock(mutex_);
++count_;
condition_.notify_one();
}
_ALWAYS_INLINE_ void wait() {
std::unique_lock<decltype(mutex_)> lock(mutex_);
while (!count_) // Handle spurious wake-ups.
condition_.wait(lock);
--count_;
}
_ALWAYS_INLINE_ bool try_wait() {
std::lock_guard<decltype(mutex_)> lock(mutex_);
if (count_) {
--count_;
return true;
}
return false;
}
};
class SemaphoreOld {
protected: protected:
static Semaphore *(*create_func)(); static SemaphoreOld *(*create_func)();
public: public:
virtual Error wait() = 0; ///< wait until semaphore has positive value, then decrement and pass virtual Error wait() = 0; ///< wait until semaphore has positive value, then decrement and pass
virtual Error post() = 0; ///< unlock the semaphore, incrementing the value virtual Error post() = 0; ///< unlock the semaphore, incrementing the value
virtual int get() const = 0; ///< get semaphore value virtual int get() const = 0; ///< get semaphore value
static Semaphore *create(); ///< Create a mutex static SemaphoreOld *create(); ///< Create a mutex
virtual ~Semaphore(); virtual ~SemaphoreOld();
}; };
#endif #endif

View file

@ -48,12 +48,12 @@ void MutexDummy::make_default() {
Mutex::create_func = &MutexDummy::create; Mutex::create_func = &MutexDummy::create;
}; };
Semaphore *SemaphoreDummy::create() { SemaphoreOld *SemaphoreDummy::create() {
return memnew(SemaphoreDummy); return memnew(SemaphoreDummy);
}; };
void SemaphoreDummy::make_default() { void SemaphoreDummy::make_default() {
Semaphore::create_func = &SemaphoreDummy::create; SemaphoreOld::create_func = &SemaphoreDummy::create;
}; };
RWLock *RWLockDummy::create() { RWLock *RWLockDummy::create() {

View file

@ -58,9 +58,9 @@ public:
static void make_default(); static void make_default();
}; };
class SemaphoreDummy : public Semaphore { class SemaphoreDummy : public SemaphoreOld {
static Semaphore *create(); static SemaphoreOld *create();
public: public:
virtual Error wait() { return OK; }; virtual Error wait() { return OK; };

View file

@ -3,6 +3,8 @@
#include "core/print_string.h" #include "core/print_string.h"
#include "core/rid.h" #include "core/rid.h"
#include "core/spin_lock.h"
#include <typeinfo> #include <typeinfo>
class RID_AllocBase { class RID_AllocBase {
@ -28,7 +30,7 @@ public:
virtual ~RID_AllocBase() {} virtual ~RID_AllocBase() {}
}; };
template <class T> template <class T, bool THREAD_SAFE = false>
class RID_Alloc : public RID_AllocBase { class RID_Alloc : public RID_AllocBase {
T **chunks; T **chunks;
@ -41,9 +43,15 @@ class RID_Alloc : public RID_AllocBase {
const char *description; const char *description;
SpinLock spin_lock;
public: public:
RID make_rid(const T &p_value) { RID make_rid(const T &p_value) {
if (THREAD_SAFE) {
spin_lock.lock();
}
if (alloc_count == max_alloc) { if (alloc_count == max_alloc) {
//allocate a new chunk //allocate a new chunk
uint32_t chunk_count = alloc_count == 0 ? 0 : (max_alloc / elements_in_chunk); uint32_t chunk_count = alloc_count == 0 ? 0 : (max_alloc / elements_in_chunk);
@ -85,11 +93,19 @@ public:
validator_chunks[free_chunk][free_element] = validator; validator_chunks[free_chunk][free_element] = validator;
alloc_count++; alloc_count++;
if (THREAD_SAFE) {
spin_lock.unlock();
}
return _make_from_id(id); return _make_from_id(id);
} }
_FORCE_INLINE_ T *getornull(const RID &p_rid) { _FORCE_INLINE_ T *getornull(const RID &p_rid) {
if (THREAD_SAFE) {
spin_lock.lock();
}
uint64_t id = p_rid.get_id(); uint64_t id = p_rid.get_id();
uint32_t idx = uint32_t(id & 0xFFFFFFFF); uint32_t idx = uint32_t(id & 0xFFFFFFFF);
if (unlikely(idx >= max_alloc)) { if (unlikely(idx >= max_alloc)) {
@ -104,14 +120,27 @@ public:
return NULL; return NULL;
} }
return &chunks[idx_chunk][idx_element]; T *ptr = &chunks[idx_chunk][idx_element];
if (THREAD_SAFE) {
spin_lock.unlock();
}
return ptr;
} }
_FORCE_INLINE_ bool owns(const RID &p_rid) { _FORCE_INLINE_ bool owns(const RID &p_rid) {
if (THREAD_SAFE) {
spin_lock.lock();
}
uint64_t id = p_rid.get_id(); uint64_t id = p_rid.get_id();
uint32_t idx = uint32_t(id & 0xFFFFFFFF); uint32_t idx = uint32_t(id & 0xFFFFFFFF);
if (unlikely(idx >= max_alloc)) { if (unlikely(idx >= max_alloc)) {
if (THREAD_SAFE) {
spin_lock.unlock();
}
return false; return false;
} }
@ -119,11 +148,22 @@ public:
uint32_t idx_element = idx % elements_in_chunk; uint32_t idx_element = idx % elements_in_chunk;
uint32_t validator = uint32_t(id >> 32); uint32_t validator = uint32_t(id >> 32);
return validator_chunks[idx_chunk][idx_element] == validator;
bool owned = validator_chunks[idx_chunk][idx_element] == validator;
if (THREAD_SAFE) {
spin_lock.unlock();
}
return owned;
} }
_FORCE_INLINE_ void free(const RID &p_rid) { _FORCE_INLINE_ void free(const RID &p_rid) {
if (THREAD_SAFE) {
spin_lock.lock();
}
uint64_t id = p_rid.get_id(); uint64_t id = p_rid.get_id();
uint32_t idx = uint32_t(id & 0xFFFFFFFF); uint32_t idx = uint32_t(id & 0xFFFFFFFF);
ERR_FAIL_COND(idx >= max_alloc); ERR_FAIL_COND(idx >= max_alloc);
@ -139,6 +179,10 @@ public:
alloc_count--; alloc_count--;
free_list_chunks[alloc_count / elements_in_chunk][alloc_count % elements_in_chunk] = idx; free_list_chunks[alloc_count / elements_in_chunk][alloc_count % elements_in_chunk] = idx;
if (THREAD_SAFE) {
spin_lock.unlock();
}
} }
_FORCE_INLINE_ uint32_t get_rid_count() const { _FORCE_INLINE_ uint32_t get_rid_count() const {
@ -147,8 +191,15 @@ public:
_FORCE_INLINE_ T *get_rid_by_index(uint32_t p_index) { _FORCE_INLINE_ T *get_rid_by_index(uint32_t p_index) {
ERR_FAIL_INDEX_V(p_index, alloc_count, NULL); ERR_FAIL_INDEX_V(p_index, alloc_count, NULL);
if (THREAD_SAFE) {
spin_lock.lock();
}
uint64_t idx = free_list_chunks[p_index / elements_in_chunk][p_index % elements_in_chunk]; uint64_t idx = free_list_chunks[p_index / elements_in_chunk][p_index % elements_in_chunk];
return &chunks[idx / elements_in_chunk][idx % elements_in_chunk]; T *ptr = &chunks[idx / elements_in_chunk][idx % elements_in_chunk];
if (THREAD_SAFE) {
spin_lock.unlock();
}
return ptr;
} }
void get_owned_list(List<RID> *p_owned) { void get_owned_list(List<RID> *p_owned) {
@ -203,9 +254,9 @@ public:
} }
}; };
template <class T> template <class T, bool THREAD_SAFE = false>
class RID_PtrOwner { class RID_PtrOwner {
RID_Alloc<T *> alloc; RID_Alloc<T *, THREAD_SAFE> alloc;
public: public:
_FORCE_INLINE_ RID make_rid(T *p_ptr) { _FORCE_INLINE_ RID make_rid(T *p_ptr) {
@ -239,9 +290,9 @@ public:
alloc(p_target_chunk_byte_size) {} alloc(p_target_chunk_byte_size) {}
}; };
template <class T> template <class T, bool THREAD_SAFE = false>
class RID_Owner { class RID_Owner {
RID_Alloc<T> alloc; RID_Alloc<T, THREAD_SAFE> alloc;
public: public:
_FORCE_INLINE_ RID make_rid(const T &p_ptr) { _FORCE_INLINE_ RID make_rid(const T &p_ptr) {

20
core/spin_lock.h Normal file
View file

@ -0,0 +1,20 @@
#ifndef SPIN_LOCK_H
#define SPIN_LOCK_H
#include "core/typedefs.h"
#include <atomic>
class SpinLock {
std::atomic_flag locked = ATOMIC_FLAG_INIT;
public:
_ALWAYS_INLINE_ void lock() {
while (locked.test_and_set(std::memory_order_acquire)) {
;
}
}
_ALWAYS_INLINE_ void unlock() {
locked.clear(std::memory_order_release);
}
};
#endif // SPIN_LOCK_H

53
core/thread_work_pool.cpp Normal file
View file

@ -0,0 +1,53 @@
#include "thread_work_pool.h"
#include "core/os/os.h"
void ThreadWorkPool::_thread_function(ThreadData *p_thread) {
while (true) {
p_thread->start.wait();
if (p_thread->exit.load()) {
break;
}
p_thread->work->work();
p_thread->completed.post();
}
}
void ThreadWorkPool::init(int p_thread_count) {
ERR_FAIL_COND(threads != nullptr);
if (p_thread_count < 0) {
p_thread_count = OS::get_singleton()->get_processor_count();
}
thread_count = p_thread_count;
threads = memnew_arr(ThreadData, thread_count);
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].exit.store(false);
threads[i].thread = memnew(std::thread(ThreadWorkPool::_thread_function, &threads[i]));
}
}
void ThreadWorkPool::finish() {
if (threads == nullptr) {
return;
}
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].exit.store(true);
threads[i].start.post();
}
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].thread->join();
memdelete(threads[i].thread);
}
memdelete_arr(threads);
threads = nullptr;
}
ThreadWorkPool::~ThreadWorkPool() {
finish();
}

78
core/thread_work_pool.h Normal file
View file

@ -0,0 +1,78 @@
#ifndef THREAD_WORK_POOL_H
#define THREAD_WORK_POOL_H
#include "core/os/memory.h"
#include "core/os/semaphore.h"
#include <atomic>
#include <thread>
class ThreadWorkPool {
std::atomic<uint32_t> index;
struct BaseWork {
std::atomic<uint32_t> *index;
uint32_t max_elements;
virtual void work() = 0;
};
template <class C, class M, class U>
struct Work : public BaseWork {
C *instance;
M method;
U userdata;
virtual void work() {
while (true) {
uint32_t work_index = index->fetch_add(1, std::memory_order_relaxed);
if (work_index >= max_elements) {
break;
}
(instance->*method)(work_index, userdata);
}
}
};
struct ThreadData {
std::thread *thread;
Semaphore start;
Semaphore completed;
std::atomic<bool> exit;
BaseWork *work;
};
ThreadData *threads = nullptr;
uint32_t thread_count = 0;
static void _thread_function(ThreadData *p_thread);
public:
template <class C, class M, class U>
void do_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
ERR_FAIL_COND(!threads); //never initialized
index.store(0);
Work<C, M, U> *w = memnew((Work<C, M, U>));
w->instance = p_instance;
w->userdata = p_userdata;
w->method = p_method;
w->index = &index;
w->max_elements = p_elements;
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].work = w;
threads[i].start.post();
}
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].completed.wait();
threads[i].work = nullptr;
}
}
void init(int p_thread_count = -1);
void finish();
~ThreadWorkPool();
};
#endif // THREAD_POOL_H

View file

@ -62,7 +62,7 @@ int SemaphorePosix::get() const {
return val; return val;
} }
Semaphore *SemaphorePosix::create_semaphore_posix() { SemaphoreOld *SemaphorePosix::create_semaphore_posix() {
return memnew(SemaphorePosix); return memnew(SemaphorePosix);
} }

View file

@ -37,11 +37,11 @@
#include <semaphore.h> #include <semaphore.h>
class SemaphorePosix : public Semaphore { class SemaphorePosix : public SemaphoreOld {
mutable sem_t sem; mutable sem_t sem;
static Semaphore *create_semaphore_posix(); static SemaphoreOld *create_semaphore_posix();
public: public:
virtual Error wait(); virtual Error wait();

View file

@ -3448,8 +3448,6 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa
RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages) { RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages) {
_THREAD_SAFE_METHOD_
//descriptor layouts //descriptor layouts
Vector<Vector<VkDescriptorSetLayoutBinding> > set_bindings; Vector<Vector<VkDescriptorSetLayoutBinding> > set_bindings;
Vector<Vector<UniformInfo> > uniform_info; Vector<Vector<UniformInfo> > uniform_info;
@ -3694,6 +3692,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
//all good, let's create modules //all good, let's create modules
_THREAD_SAFE_METHOD_
Shader shader; Shader shader;
shader.vertex_input_locations = vertex_input_locations; shader.vertex_input_locations = vertex_input_locations;

View file

@ -114,7 +114,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
RID owner; RID owner;
}; };
RID_Owner<Texture> texture_owner; RID_Owner<Texture, true> texture_owner;
uint32_t texture_upload_region_size_px; uint32_t texture_upload_region_size_px;
PoolVector<uint8_t> _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer); PoolVector<uint8_t> _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer);
@ -264,7 +264,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
Size2 size; Size2 size;
}; };
RID_Owner<Framebuffer> framebuffer_owner; RID_Owner<Framebuffer, true> framebuffer_owner;
/***********************/ /***********************/
/**** VERTEX BUFFER ****/ /**** VERTEX BUFFER ****/
@ -279,7 +279,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
// This mapping is done here internally, and it's not // This mapping is done here internally, and it's not
// exposed. // exposed.
RID_Owner<Buffer> vertex_buffer_owner; RID_Owner<Buffer, true> vertex_buffer_owner;
struct VertexDescriptionKey { struct VertexDescriptionKey {
Vector<VertexDescription> vertex_formats; Vector<VertexDescription> vertex_formats;
@ -359,7 +359,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
Vector<VkDeviceSize> offsets; Vector<VkDeviceSize> offsets;
}; };
RID_Owner<VertexArray> vertex_array_owner; RID_Owner<VertexArray, true> vertex_array_owner;
struct IndexBuffer : public Buffer { struct IndexBuffer : public Buffer {
uint32_t max_index; //used for validation uint32_t max_index; //used for validation
@ -368,7 +368,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
bool supports_restart_indices; bool supports_restart_indices;
}; };
RID_Owner<IndexBuffer> index_buffer_owner; RID_Owner<IndexBuffer, true> index_buffer_owner;
struct IndexArray { struct IndexArray {
uint32_t max_index; //remember the maximum index here too, for validation uint32_t max_index; //remember the maximum index here too, for validation
@ -379,7 +379,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
bool supports_restart_indices; bool supports_restart_indices;
}; };
RID_Owner<IndexArray> index_array_owner; RID_Owner<IndexArray, true> index_array_owner;
/****************/ /****************/
/**** SHADER ****/ /**** SHADER ****/
@ -495,7 +495,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
String _shader_uniform_debug(RID p_shader, int p_set = -1); String _shader_uniform_debug(RID p_shader, int p_set = -1);
RID_Owner<Shader> shader_owner; RID_Owner<Shader, true> shader_owner;
/******************/ /******************/
/**** UNIFORMS ****/ /**** UNIFORMS ****/
@ -559,8 +559,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
DescriptorPool *_descriptor_pool_allocate(const DescriptorPoolKey &p_key); DescriptorPool *_descriptor_pool_allocate(const DescriptorPoolKey &p_key);
void _descriptor_pool_free(const DescriptorPoolKey &p_key, DescriptorPool *p_pool); void _descriptor_pool_free(const DescriptorPoolKey &p_key, DescriptorPool *p_pool);
RID_Owner<Buffer> uniform_buffer_owner; RID_Owner<Buffer, true> uniform_buffer_owner;
RID_Owner<Buffer> storage_buffer_owner; RID_Owner<Buffer, true> storage_buffer_owner;
//texture buffer needs a view //texture buffer needs a view
struct TextureBuffer { struct TextureBuffer {
@ -568,7 +568,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
VkBufferView view; VkBufferView view;
}; };
RID_Owner<TextureBuffer> texture_buffer_owner; RID_Owner<TextureBuffer, true> texture_buffer_owner;
// This structure contains the descriptor set. They _need_ to be allocated // This structure contains the descriptor set. They _need_ to be allocated
// for a shader (and will be erased when this shader is erased), but should // for a shader (and will be erased when this shader is erased), but should
@ -589,7 +589,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
Vector<RID> attachable_textures; //used for validation Vector<RID> attachable_textures; //used for validation
}; };
RID_Owner<UniformSet> uniform_set_owner; RID_Owner<UniformSet, true> uniform_set_owner;
/*******************/ /*******************/
/**** PIPELINES ****/ /**** PIPELINES ****/
@ -607,7 +607,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
// was not supplied as intended. // was not supplied as intended.
struct RenderPipeline { struct RenderPipeline {
//Cached values for validation //Cached values for validation
#ifdef DEBUG_ENABLED #ifdef DEBUG_ENABLED
struct Validation { struct Validation {
FramebufferFormatID framebuffer_format; FramebufferFormatID framebuffer_format;
@ -627,7 +627,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
uint32_t push_constant_stages; uint32_t push_constant_stages;
}; };
RID_Owner<RenderPipeline> pipeline_owner; RID_Owner<RenderPipeline, true> pipeline_owner;
/*******************/ /*******************/
/**** DRAW LIST ****/ /**** DRAW LIST ****/

View file

@ -61,7 +61,7 @@ int SemaphoreWindows::get() const {
ERR_FAIL_V(-1); ERR_FAIL_V(-1);
} }
Semaphore *SemaphoreWindows::create_semaphore_windows() { SemaphoreOld *SemaphoreWindows::create_semaphore_windows() {
return memnew(SemaphoreWindows); return memnew(SemaphoreWindows);
} }

View file

@ -37,11 +37,11 @@
#include <windows.h> #include <windows.h>
class SemaphoreWindows : public Semaphore { class SemaphoreWindows : public SemaphoreOld {
mutable HANDLE semaphore; mutable HANDLE semaphore;
static Semaphore *create_semaphore_windows(); static SemaphoreOld *create_semaphore_windows();
public: public:
virtual Error wait(); virtual Error wait();

View file

@ -476,7 +476,7 @@ EditorResourcePreview::EditorResourcePreview() {
thread = NULL; thread = NULL;
singleton = this; singleton = this;
preview_mutex = Mutex::create(); preview_mutex = Mutex::create();
preview_sem = Semaphore::create(); preview_sem = SemaphoreOld::create();
order = 0; order = 0;
exit = false; exit = false;
exited = false; exited = false;

View file

@ -71,7 +71,7 @@ class EditorResourcePreview : public Node {
List<QueueItem> queue; List<QueueItem> queue;
Mutex *preview_mutex; Mutex *preview_mutex;
Semaphore *preview_sem; SemaphoreOld *preview_sem;
Thread *thread; Thread *thread;
volatile bool exit; volatile bool exit;
volatile bool exited; volatile bool exited;

View file

@ -140,9 +140,6 @@ static PoolVector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_s
ERR_FAIL_COND_V(p_language==RenderingDevice::SHADER_LANGUAGE_HLSL,ret); ERR_FAIL_COND_V(p_language==RenderingDevice::SHADER_LANGUAGE_HLSL,ret);
// initialize in case it's not initialized. This is done once per thread
// and it's safe to call multiple times
glslang::InitializeProcess();
EShLanguage stages[RenderingDevice::SHADER_STAGE_MAX] = { EShLanguage stages[RenderingDevice::SHADER_STAGE_MAX] = {
EShLangVertex, EShLangVertex,
EShLangFragment, EShLangFragment,
@ -229,6 +226,9 @@ static PoolVector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_s
} }
void preregister_glslang_types() { void preregister_glslang_types() {
// initialize in case it's not initialized. This is done once per thread
// and it's safe to call multiple times
glslang::InitializeProcess();
RenderingDevice::shader_set_compile_function(_compile_shader_glsl); RenderingDevice::shader_set_compile_function(_compile_shader_glsl);
} }
@ -236,5 +236,5 @@ void register_glslang_types() {
} }
void unregister_glslang_types() { void unregister_glslang_types() {
glslang::FinalizeProcess();
} }

View file

@ -91,7 +91,7 @@ int SemaphoreIphone::get() const {
return 0; return 0;
} }
Semaphore *SemaphoreIphone::create_semaphore_iphone() { SemaphoreOld *SemaphoreIphone::create_semaphore_iphone() {
return memnew(SemaphoreIphone); return memnew(SemaphoreIphone);
} }

View file

@ -39,11 +39,11 @@ typedef struct cgsem cgsem_t;
#include "core/os/semaphore.h" #include "core/os/semaphore.h"
class SemaphoreIphone : public Semaphore { class SemaphoreIphone : public SemaphoreOld {
mutable cgsem_t sem; mutable cgsem_t sem;
static Semaphore *create_semaphore_iphone(); static SemaphoreOld *create_semaphore_iphone();
public: public:
virtual Error wait(); virtual Error wait();

View file

@ -86,7 +86,7 @@ int SemaphoreOSX::get() const {
return 0; return 0;
} }
Semaphore *SemaphoreOSX::create_semaphore_osx() { SemaphoreOld *SemaphoreOSX::create_semaphore_osx() {
return memnew(SemaphoreOSX); return memnew(SemaphoreOSX);
} }

View file

@ -39,11 +39,11 @@ typedef struct cgsem cgsem_t;
#include "core/os/semaphore.h" #include "core/os/semaphore.h"
class SemaphoreOSX : public Semaphore { class SemaphoreOSX : public SemaphoreOld {
mutable cgsem_t sem; mutable cgsem_t sem;
static Semaphore *create_semaphore_osx(); static SemaphoreOld *create_semaphore_osx();
public: public:
virtual Error wait(); virtual Error wait();

View file

@ -107,7 +107,7 @@ void Physics2DServerWrapMT::init() {
if (create_thread) { if (create_thread) {
step_sem = Semaphore::create(); step_sem = SemaphoreOld::create();
//OS::get_singleton()->release_rendering_thread(); //OS::get_singleton()->release_rendering_thread();
if (create_thread) { if (create_thread) {
thread = Thread::create(_thread_callback, this); thread = Thread::create(_thread_callback, this);

View file

@ -58,7 +58,7 @@ class Physics2DServerWrapMT : public Physics2DServer {
volatile bool step_thread_up; volatile bool step_thread_up;
bool create_thread; bool create_thread;
Semaphore *step_sem; SemaphoreOld *step_sem;
int step_pending; int step_pending;
void thread_step(real_t p_delta); void thread_step(real_t p_delta);
void thread_flush(); void thread_flush();

View file

@ -76,7 +76,7 @@ void RasterizerRD::initialize() {
RenderingDevice::ShaderStageData frag; RenderingDevice::ShaderStageData frag;
frag.shader_stage = RenderingDevice::SHADER_STAGE_FRAGMENT; frag.shader_stage = RenderingDevice::SHADER_STAGE_FRAGMENT;
frag.spir_v =RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_FRAGMENT, frag.spir_v = RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_FRAGMENT,
"#version 450\n" "#version 450\n"
"layout (location = 0) in vec2 uv;\n" "layout (location = 0) in vec2 uv;\n"
"layout (location = 0) out vec4 color;\n" "layout (location = 0) out vec4 color;\n"
@ -120,8 +120,12 @@ void RasterizerRD::initialize() {
} }
} }
ThreadWorkPool RasterizerRD::thread_work_pool;
void RasterizerRD::finalize() { void RasterizerRD::finalize() {
thread_work_pool.finish();
memdelete(scene); memdelete(scene);
memdelete(canvas); memdelete(canvas);
memdelete(storage); memdelete(storage);
@ -133,6 +137,7 @@ void RasterizerRD::finalize() {
} }
RasterizerRD::RasterizerRD() { RasterizerRD::RasterizerRD() {
thread_work_pool.init();
time = 0; time = 0;
storage = memnew(RasterizerStorageRD); storage = memnew(RasterizerStorageRD);
canvas = memnew(RasterizerCanvasRD(storage)); canvas = memnew(RasterizerCanvasRD(storage));

View file

@ -2,10 +2,12 @@
#define RASTERIZER_RD_H #define RASTERIZER_RD_H
#include "core/os/os.h" #include "core/os/os.h"
#include "core/thread_work_pool.h"
#include "servers/visual/rasterizer.h" #include "servers/visual/rasterizer.h"
#include "servers/visual/rasterizer_rd/rasterizer_canvas_rd.h" #include "servers/visual/rasterizer_rd/rasterizer_canvas_rd.h"
#include "servers/visual/rasterizer_rd/rasterizer_scene_forward_rd.h" #include "servers/visual/rasterizer_rd/rasterizer_scene_forward_rd.h"
#include "servers/visual/rasterizer_rd/rasterizer_storage_rd.h" #include "servers/visual/rasterizer_rd/rasterizer_storage_rd.h"
class RasterizerRD : public Rasterizer { class RasterizerRD : public Rasterizer {
protected: protected:
RasterizerCanvasRD *canvas; RasterizerCanvasRD *canvas;
@ -51,6 +53,8 @@ public:
virtual bool is_low_end() const { return true; } virtual bool is_low_end() const { return true; }
static ThreadWorkPool thread_work_pool;
RasterizerRD(); RasterizerRD();
~RasterizerRD() {} ~RasterizerRD() {}
}; };

View file

@ -4,8 +4,8 @@
#include "core/rid_owner.h" #include "core/rid_owner.h"
#include "servers/visual/rasterizer.h" #include "servers/visual/rasterizer.h"
#include "servers/visual/rasterizer_rd/effects_rd.h" #include "servers/visual/rasterizer_rd/effects_rd.h"
#include "servers/visual/rendering_device.h"
#include "servers/visual/rasterizer_rd/shader_compiler_rd.h" #include "servers/visual/rasterizer_rd/shader_compiler_rd.h"
#include "servers/visual/rendering_device.h"
class RasterizerStorageRD : public RasterizerStorage { class RasterizerStorageRD : public RasterizerStorage {
public: public:
@ -109,7 +109,8 @@ private:
} }
}; };
mutable RID_Owner<Texture> texture_owner; //textures can be created from threads, so this RID_Owner is thread safe
mutable RID_Owner<Texture, true> texture_owner;
Ref<Image> _validate_texture_format(const Ref<Image> &p_image, TextureToRDFormat &r_format); Ref<Image> _validate_texture_format(const Ref<Image> &p_image, TextureToRDFormat &r_format);

View file

@ -30,6 +30,7 @@
#include "shader_rd.h" #include "shader_rd.h"
#include "core/string_builder.h" #include "core/string_builder.h"
#include "rasterizer_rd.h"
#include "servers/visual/rendering_device.h" #include "servers/visual/rendering_device.h"
void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name) { void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name) {
@ -160,6 +161,121 @@ void ShaderRD::_clear_version(Version *p_version) {
p_version->variants = NULL; p_version->variants = NULL;
} }
} }
void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
Vector<RD::ShaderStageData> stages;
String error;
String current_source;
RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX;
bool build_ok = true;
{
//vertex stage
StringBuilder builder;
builder.append(vertex_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data());
builder.append(variant_defines[p_variant].get_data());
for (int j = 0; j < p_version->custom_defines.size(); j++) {
builder.append(p_version->custom_defines[j].get_data());
}
builder.append(vertex_code0.get_data()); //first part of vertex
builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment)
builder.append(vertex_code1.get_data()); //second part of vertex
builder.append(p_version->vertex_globals.get_data()); // vertex globals
builder.append(vertex_code2.get_data()); //third part of vertex
builder.append(p_version->vertex_code.get_data()); // code
builder.append(vertex_code3.get_data()); //fourth of vertex
current_source = builder.as_string();
RD::ShaderStageData stage;
stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_VERTEX, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
if (stage.spir_v.size() == 0) {
build_ok = false;
} else {
stage.shader_stage = RD::SHADER_STAGE_VERTEX;
stages.push_back(stage);
}
}
if (build_ok) {
//fragment stage
current_stage = RD::SHADER_STAGE_FRAGMENT;
StringBuilder builder;
builder.append(fragment_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data());
builder.append(variant_defines[p_variant].get_data());
for (int j = 0; j < p_version->custom_defines.size(); j++) {
builder.append(p_version->custom_defines[j].get_data());
}
builder.append(fragment_code0.get_data()); //first part of fragment
builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment)
builder.append(fragment_code1.get_data()); //first part of fragment
builder.append(p_version->fragment_globals.get_data()); // fragment globals
builder.append(fragment_code2.get_data()); //third part of fragment
builder.append(p_version->fragment_light.get_data()); // fragment light
builder.append(fragment_code3.get_data()); //fourth part of fragment
builder.append(p_version->fragment_code.get_data()); // fragment code
builder.append(fragment_code4.get_data()); //fourth part of fragment
current_source = builder.as_string();
RD::ShaderStageData stage;
stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_FRAGMENT, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
if (stage.spir_v.size() == 0) {
build_ok = false;
} else {
stage.shader_stage = RD::SHADER_STAGE_FRAGMENT;
stages.push_back(stage);
}
}
if (!build_ok) {
variant_set_mutex.lock(); //properly print the errors
ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ").");
ERR_PRINT(error);
#ifdef DEBUG_ENABLED
ERR_PRINT("code:\n" + current_source.get_with_code_lines());
#endif
variant_set_mutex.unlock();
return;
}
RID shader = RD::get_singleton()->shader_create(stages);
variant_set_mutex.lock();
p_version->variants[p_variant] = shader;
variant_set_mutex.unlock();
}
void ShaderRD::_compile_version(Version *p_version) { void ShaderRD::_compile_version(Version *p_version) {
_clear_version(p_version); _clear_version(p_version);
@ -168,134 +284,34 @@ void ShaderRD::_compile_version(Version *p_version) {
p_version->dirty = false; p_version->dirty = false;
p_version->variants = memnew_arr(RID, variant_defines.size()); p_version->variants = memnew_arr(RID, variant_defines.size());
#if 1
RasterizerRD::thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version);
#else
for (int i = 0; i < variant_defines.size(); i++) { for (int i = 0; i < variant_defines.size(); i++) {
Vector<RD::ShaderStageData> stages; _compile_variant(i, p_version);
}
String error;
String current_source;
RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX;
bool build_ok=true;
{
//vertex stage
StringBuilder builder;
builder.append(vertex_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data());
builder.append(variant_defines[i].get_data());
for (int j = 0; j < p_version->custom_defines.size(); j++) {
builder.append(p_version->custom_defines[j].get_data());
}
builder.append(vertex_code0.get_data()); //first part of vertex
builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment)
builder.append(vertex_code1.get_data()); //second part of vertex
builder.append(p_version->vertex_globals.get_data()); // vertex globals
builder.append(vertex_code2.get_data()); //third part of vertex
builder.append(p_version->vertex_code.get_data()); // code
builder.append(vertex_code3.get_data()); //fourth of vertex
current_source = builder.as_string();
RD::ShaderStageData stage;
stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_VERTEX,current_source,RD::SHADER_LANGUAGE_GLSL,&error);
if (stage.spir_v.size()==0) {
build_ok=false;
} else {
stage.shader_stage = RD::SHADER_STAGE_VERTEX;
stages.push_back(stage);
}
}
if (build_ok){
//fragment stage
current_stage =RD::SHADER_STAGE_FRAGMENT;
StringBuilder builder;
builder.append(fragment_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data());
builder.append(variant_defines[i].get_data());
for (int j = 0; j < p_version->custom_defines.size(); j++) {
builder.append(p_version->custom_defines[j].get_data());
}
builder.append(fragment_code0.get_data()); //first part of fragment
builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment)
builder.append(fragment_code1.get_data()); //first part of fragment
builder.append(p_version->fragment_globals.get_data()); // fragment globals
builder.append(fragment_code2.get_data()); //third part of fragment
builder.append(p_version->fragment_light.get_data()); // fragment light
builder.append(fragment_code3.get_data()); //fourth part of fragment
builder.append(p_version->fragment_code.get_data()); // fragment code
builder.append(fragment_code4.get_data()); //fourth part of fragment
current_source = builder.as_string();
RD::ShaderStageData stage;
stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_FRAGMENT,current_source,RD::SHADER_LANGUAGE_GLSL,&error);
if (stage.spir_v.size()==0) {
build_ok=false;
} else {
stage.shader_stage = RD::SHADER_STAGE_FRAGMENT;
stages.push_back(stage);
}
}
if (!build_ok) {
ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(i) + " (" + variant_defines[i].get_data() + ").");
ERR_PRINT(error);
#ifdef DEBUG_ENABLED
ERR_PRINT("code:\n" + current_source.get_with_code_lines());
#endif #endif
//clear versions if they exist
for (int j = 0; j < i; j++) {
RD::get_singleton()->free(p_version->variants[j]);
}
memdelete_arr(p_version->variants); bool all_valid = true;
p_version->variants = NULL; for (int i = 0; i < variant_defines.size(); i++) {
return; if (p_version->variants[i].is_null()) {
all_valid = false;
break;
} }
}
RID shader = RD::get_singleton()->shader_create(stages); if (!all_valid) {
//clear versions if they exist
if (shader.is_null()) { for (int i = 0; i < variant_defines.size(); i++) {
//clear versions if they exist if (!p_version->variants[i].is_null()) {
for (int j = 0; j < i; j++) { RD::get_singleton()->free(p_version->variants[i]);
RD::get_singleton()->free(p_version->variants[j]);
} }
memdelete_arr(p_version->variants);
p_version->variants = NULL;
return;
} }
memdelete_arr(p_version->variants);
p_version->variants[i] = shader; p_version->variants = NULL;
return;
} }
p_version->valid = true; p_version->valid = true;

View file

@ -36,7 +36,7 @@
#include "core/rid_owner.h" #include "core/rid_owner.h"
#include "core/variant.h" #include "core/variant.h"
#include <stdio.h> #include <stdio.h>
#include <mutex>
/** /**
@author Juan Linietsky <reduzio@gmail.com> @author Juan Linietsky <reduzio@gmail.com>
*/ */
@ -67,6 +67,10 @@ class ShaderRD {
bool initialize_needed; bool initialize_needed;
}; };
std::mutex variant_set_mutex;
void _compile_variant(uint32_t p_variant, Version *p_version);
void _clear_version(Version *p_version); void _clear_version(Version *p_version);
void _compile_version(Version *p_version); void _compile_version(Version *p_version);

View file

@ -3511,7 +3511,7 @@ VisualServerScene *VisualServerScene::singleton = NULL;
VisualServerScene::VisualServerScene() { VisualServerScene::VisualServerScene() {
#ifndef NO_THREADS #ifndef NO_THREADS
probe_bake_sem = Semaphore::create(); probe_bake_sem = SemaphoreOld::create();
probe_bake_mutex = Mutex::create(); probe_bake_mutex = Mutex::create();
probe_bake_thread = Thread::create(_gi_probe_bake_threads, this); probe_bake_thread = Thread::create(_gi_probe_bake_threads, this);
probe_bake_thread_exit = false; probe_bake_thread_exit = false;

View file

@ -517,7 +517,7 @@ public:
volatile bool probe_bake_thread_exit; volatile bool probe_bake_thread_exit;
Thread *probe_bake_thread; Thread *probe_bake_thread;
Semaphore *probe_bake_sem; SemaphoreOld *probe_bake_sem;
Mutex *probe_bake_mutex; Mutex *probe_bake_mutex;
List<Instance *> probe_bake_list; List<Instance *> probe_bake_list;