Added a spinlock template as well as a thread work pool class.

Also, optimized shader compilation to happen on threads.
This commit is contained in:
Juan Linietsky 2019-07-29 12:59:18 -03:00
parent 4fe3ee1730
commit c613ead5fa
37 changed files with 458 additions and 192 deletions

View file

@ -2612,7 +2612,7 @@ void _Semaphore::_bind_methods() {
_Semaphore::_Semaphore() {
semaphore = Semaphore::create();
semaphore = SemaphoreOld::create();
}
_Semaphore::~_Semaphore() {

View file

@ -642,7 +642,7 @@ public:
class _Semaphore : public Reference {
GDCLASS(_Semaphore, Reference);
Semaphore *semaphore;
SemaphoreOld *semaphore;
static void _bind_methods();

View file

@ -111,11 +111,11 @@ CommandQueueMT::CommandQueueMT(bool p_sync) {
for (int i = 0; i < SYNC_SEMAPHORES; i++) {
sync_sems[i].sem = Semaphore::create();
sync_sems[i].sem = SemaphoreOld::create();
sync_sems[i].in_use = false;
}
if (p_sync)
sync = Semaphore::create();
sync = SemaphoreOld::create();
else
sync = NULL;
}

View file

@ -297,7 +297,7 @@ class CommandQueueMT {
struct SyncSemaphore {
Semaphore *sem;
SemaphoreOld *sem;
bool in_use;
};
@ -342,7 +342,7 @@ class CommandQueueMT {
uint32_t dealloc_ptr;
SyncSemaphore sync_sems[SYNC_SEMAPHORES];
Mutex *mutex;
Semaphore *sync;
SemaphoreOld *sync;
template <class T>
T *allocate() {

View file

@ -231,7 +231,7 @@ FileAccessNetworkClient::FileAccessNetworkClient() {
singleton = this;
last_id = 0;
client.instance();
sem = Semaphore::create();
sem = SemaphoreOld::create();
lockcount = 0;
}
@ -522,8 +522,8 @@ FileAccessNetwork::FileAccessNetwork() {
eof_flag = false;
opened = false;
pos = 0;
sem = Semaphore::create();
page_sem = Semaphore::create();
sem = SemaphoreOld::create();
page_sem = SemaphoreOld::create();
buffer_mutex = Mutex::create();
FileAccessNetworkClient *nc = FileAccessNetworkClient::singleton;
nc->lock_mutex();

View file

@ -49,7 +49,7 @@ class FileAccessNetworkClient {
List<BlockRequest> block_requests;
Semaphore *sem;
SemaphoreOld *sem;
Thread *thread;
bool quit;
Mutex *mutex;
@ -85,8 +85,8 @@ public:
class FileAccessNetwork : public FileAccess {
Semaphore *sem;
Semaphore *page_sem;
SemaphoreOld *sem;
SemaphoreOld *page_sem;
Mutex *buffer_mutex;
bool opened;
size_t total_size;

View file

@ -71,7 +71,7 @@ struct _IP_ResolverPrivate {
}
Mutex *mutex;
Semaphore *sem;
SemaphoreOld *sem;
Thread *thread;
//Semaphore* semaphore;
@ -319,7 +319,7 @@ IP::IP() {
#ifndef NO_THREADS
resolver->sem = Semaphore::create();
resolver->sem = SemaphoreOld::create();
if (resolver->sem) {
resolver->thread_abort = false;

View file

@ -32,14 +32,14 @@
#include "core/error_macros.h"
Semaphore *(*Semaphore::create_func)() = 0;
SemaphoreOld *(*SemaphoreOld::create_func)() = 0;
Semaphore *Semaphore::create() {
SemaphoreOld *SemaphoreOld::create() {
ERR_FAIL_COND_V(!create_func, 0);
return create_func();
}
Semaphore::~Semaphore() {
SemaphoreOld::~SemaphoreOld() {
}

View file

@ -32,19 +32,53 @@
#define SEMAPHORE_H
#include "core/error_list.h"
#include "core/typedefs.h"
#include <condition_variable>
#include <mutex>
class Semaphore {
private:
std::mutex mutex_;
std::condition_variable condition_;
unsigned long count_ = 0; // Initialized as locked.
public:
_ALWAYS_INLINE_ void post() {
std::lock_guard<decltype(mutex_)> lock(mutex_);
++count_;
condition_.notify_one();
}
_ALWAYS_INLINE_ void wait() {
std::unique_lock<decltype(mutex_)> lock(mutex_);
while (!count_) // Handle spurious wake-ups.
condition_.wait(lock);
--count_;
}
_ALWAYS_INLINE_ bool try_wait() {
std::lock_guard<decltype(mutex_)> lock(mutex_);
if (count_) {
--count_;
return true;
}
return false;
}
};
class SemaphoreOld {
protected:
static Semaphore *(*create_func)();
static SemaphoreOld *(*create_func)();
public:
virtual Error wait() = 0; ///< wait until semaphore has positive value, then decrement and pass
virtual Error post() = 0; ///< unlock the semaphore, incrementing the value
virtual int get() const = 0; ///< get semaphore value
static Semaphore *create(); ///< Create a mutex
static SemaphoreOld *create(); ///< Create a mutex
virtual ~Semaphore();
virtual ~SemaphoreOld();
};
#endif

View file

@ -48,12 +48,12 @@ void MutexDummy::make_default() {
Mutex::create_func = &MutexDummy::create;
};
Semaphore *SemaphoreDummy::create() {
SemaphoreOld *SemaphoreDummy::create() {
return memnew(SemaphoreDummy);
};
void SemaphoreDummy::make_default() {
Semaphore::create_func = &SemaphoreDummy::create;
SemaphoreOld::create_func = &SemaphoreDummy::create;
};
RWLock *RWLockDummy::create() {

View file

@ -58,9 +58,9 @@ public:
static void make_default();
};
class SemaphoreDummy : public Semaphore {
class SemaphoreDummy : public SemaphoreOld {
static Semaphore *create();
static SemaphoreOld *create();
public:
virtual Error wait() { return OK; };

View file

@ -3,6 +3,8 @@
#include "core/print_string.h"
#include "core/rid.h"
#include "core/spin_lock.h"
#include <typeinfo>
class RID_AllocBase {
@ -28,7 +30,7 @@ public:
virtual ~RID_AllocBase() {}
};
template <class T>
template <class T, bool THREAD_SAFE = false>
class RID_Alloc : public RID_AllocBase {
T **chunks;
@ -41,9 +43,15 @@ class RID_Alloc : public RID_AllocBase {
const char *description;
SpinLock spin_lock;
public:
RID make_rid(const T &p_value) {
if (THREAD_SAFE) {
spin_lock.lock();
}
if (alloc_count == max_alloc) {
//allocate a new chunk
uint32_t chunk_count = alloc_count == 0 ? 0 : (max_alloc / elements_in_chunk);
@ -85,11 +93,19 @@ public:
validator_chunks[free_chunk][free_element] = validator;
alloc_count++;
if (THREAD_SAFE) {
spin_lock.unlock();
}
return _make_from_id(id);
}
_FORCE_INLINE_ T *getornull(const RID &p_rid) {
if (THREAD_SAFE) {
spin_lock.lock();
}
uint64_t id = p_rid.get_id();
uint32_t idx = uint32_t(id & 0xFFFFFFFF);
if (unlikely(idx >= max_alloc)) {
@ -104,14 +120,27 @@ public:
return NULL;
}
return &chunks[idx_chunk][idx_element];
T *ptr = &chunks[idx_chunk][idx_element];
if (THREAD_SAFE) {
spin_lock.unlock();
}
return ptr;
}
_FORCE_INLINE_ bool owns(const RID &p_rid) {
if (THREAD_SAFE) {
spin_lock.lock();
}
uint64_t id = p_rid.get_id();
uint32_t idx = uint32_t(id & 0xFFFFFFFF);
if (unlikely(idx >= max_alloc)) {
if (THREAD_SAFE) {
spin_lock.unlock();
}
return false;
}
@ -119,11 +148,22 @@ public:
uint32_t idx_element = idx % elements_in_chunk;
uint32_t validator = uint32_t(id >> 32);
return validator_chunks[idx_chunk][idx_element] == validator;
bool owned = validator_chunks[idx_chunk][idx_element] == validator;
if (THREAD_SAFE) {
spin_lock.unlock();
}
return owned;
}
_FORCE_INLINE_ void free(const RID &p_rid) {
if (THREAD_SAFE) {
spin_lock.lock();
}
uint64_t id = p_rid.get_id();
uint32_t idx = uint32_t(id & 0xFFFFFFFF);
ERR_FAIL_COND(idx >= max_alloc);
@ -139,6 +179,10 @@ public:
alloc_count--;
free_list_chunks[alloc_count / elements_in_chunk][alloc_count % elements_in_chunk] = idx;
if (THREAD_SAFE) {
spin_lock.unlock();
}
}
_FORCE_INLINE_ uint32_t get_rid_count() const {
@ -147,8 +191,15 @@ public:
_FORCE_INLINE_ T *get_rid_by_index(uint32_t p_index) {
ERR_FAIL_INDEX_V(p_index, alloc_count, NULL);
if (THREAD_SAFE) {
spin_lock.lock();
}
uint64_t idx = free_list_chunks[p_index / elements_in_chunk][p_index % elements_in_chunk];
return &chunks[idx / elements_in_chunk][idx % elements_in_chunk];
T *ptr = &chunks[idx / elements_in_chunk][idx % elements_in_chunk];
if (THREAD_SAFE) {
spin_lock.unlock();
}
return ptr;
}
void get_owned_list(List<RID> *p_owned) {
@ -203,9 +254,9 @@ public:
}
};
template <class T>
template <class T, bool THREAD_SAFE = false>
class RID_PtrOwner {
RID_Alloc<T *> alloc;
RID_Alloc<T *, THREAD_SAFE> alloc;
public:
_FORCE_INLINE_ RID make_rid(T *p_ptr) {
@ -239,9 +290,9 @@ public:
alloc(p_target_chunk_byte_size) {}
};
template <class T>
template <class T, bool THREAD_SAFE = false>
class RID_Owner {
RID_Alloc<T> alloc;
RID_Alloc<T, THREAD_SAFE> alloc;
public:
_FORCE_INLINE_ RID make_rid(const T &p_ptr) {

20
core/spin_lock.h Normal file
View file

@ -0,0 +1,20 @@
#ifndef SPIN_LOCK_H
#define SPIN_LOCK_H
#include "core/typedefs.h"
#include <atomic>
class SpinLock {
std::atomic_flag locked = ATOMIC_FLAG_INIT;
public:
_ALWAYS_INLINE_ void lock() {
while (locked.test_and_set(std::memory_order_acquire)) {
;
}
}
_ALWAYS_INLINE_ void unlock() {
locked.clear(std::memory_order_release);
}
};
#endif // SPIN_LOCK_H

53
core/thread_work_pool.cpp Normal file
View file

@ -0,0 +1,53 @@
#include "thread_work_pool.h"
#include "core/os/os.h"
void ThreadWorkPool::_thread_function(ThreadData *p_thread) {
while (true) {
p_thread->start.wait();
if (p_thread->exit.load()) {
break;
}
p_thread->work->work();
p_thread->completed.post();
}
}
void ThreadWorkPool::init(int p_thread_count) {
ERR_FAIL_COND(threads != nullptr);
if (p_thread_count < 0) {
p_thread_count = OS::get_singleton()->get_processor_count();
}
thread_count = p_thread_count;
threads = memnew_arr(ThreadData, thread_count);
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].exit.store(false);
threads[i].thread = memnew(std::thread(ThreadWorkPool::_thread_function, &threads[i]));
}
}
void ThreadWorkPool::finish() {
if (threads == nullptr) {
return;
}
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].exit.store(true);
threads[i].start.post();
}
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].thread->join();
memdelete(threads[i].thread);
}
memdelete_arr(threads);
threads = nullptr;
}
ThreadWorkPool::~ThreadWorkPool() {
finish();
}

78
core/thread_work_pool.h Normal file
View file

@ -0,0 +1,78 @@
#ifndef THREAD_WORK_POOL_H
#define THREAD_WORK_POOL_H
#include "core/os/memory.h"
#include "core/os/semaphore.h"
#include <atomic>
#include <thread>
class ThreadWorkPool {
std::atomic<uint32_t> index;
struct BaseWork {
std::atomic<uint32_t> *index;
uint32_t max_elements;
virtual void work() = 0;
};
template <class C, class M, class U>
struct Work : public BaseWork {
C *instance;
M method;
U userdata;
virtual void work() {
while (true) {
uint32_t work_index = index->fetch_add(1, std::memory_order_relaxed);
if (work_index >= max_elements) {
break;
}
(instance->*method)(work_index, userdata);
}
}
};
struct ThreadData {
std::thread *thread;
Semaphore start;
Semaphore completed;
std::atomic<bool> exit;
BaseWork *work;
};
ThreadData *threads = nullptr;
uint32_t thread_count = 0;
static void _thread_function(ThreadData *p_thread);
public:
template <class C, class M, class U>
void do_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
ERR_FAIL_COND(!threads); //never initialized
index.store(0);
Work<C, M, U> *w = memnew((Work<C, M, U>));
w->instance = p_instance;
w->userdata = p_userdata;
w->method = p_method;
w->index = &index;
w->max_elements = p_elements;
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].work = w;
threads[i].start.post();
}
for (uint32_t i = 0; i < thread_count; i++) {
threads[i].completed.wait();
threads[i].work = nullptr;
}
}
void init(int p_thread_count = -1);
void finish();
~ThreadWorkPool();
};
#endif // THREAD_POOL_H

View file

@ -62,7 +62,7 @@ int SemaphorePosix::get() const {
return val;
}
Semaphore *SemaphorePosix::create_semaphore_posix() {
SemaphoreOld *SemaphorePosix::create_semaphore_posix() {
return memnew(SemaphorePosix);
}

View file

@ -37,11 +37,11 @@
#include <semaphore.h>
class SemaphorePosix : public Semaphore {
class SemaphorePosix : public SemaphoreOld {
mutable sem_t sem;
static Semaphore *create_semaphore_posix();
static SemaphoreOld *create_semaphore_posix();
public:
virtual Error wait();

View file

@ -3448,8 +3448,6 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa
RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages) {
_THREAD_SAFE_METHOD_
//descriptor layouts
Vector<Vector<VkDescriptorSetLayoutBinding> > set_bindings;
Vector<Vector<UniformInfo> > uniform_info;
@ -3694,6 +3692,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
//all good, let's create modules
_THREAD_SAFE_METHOD_
Shader shader;
shader.vertex_input_locations = vertex_input_locations;

View file

@ -114,7 +114,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
RID owner;
};
RID_Owner<Texture> texture_owner;
RID_Owner<Texture, true> texture_owner;
uint32_t texture_upload_region_size_px;
PoolVector<uint8_t> _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer);
@ -264,7 +264,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
Size2 size;
};
RID_Owner<Framebuffer> framebuffer_owner;
RID_Owner<Framebuffer, true> framebuffer_owner;
/***********************/
/**** VERTEX BUFFER ****/
@ -279,7 +279,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
// This mapping is done here internally, and it's not
// exposed.
RID_Owner<Buffer> vertex_buffer_owner;
RID_Owner<Buffer, true> vertex_buffer_owner;
struct VertexDescriptionKey {
Vector<VertexDescription> vertex_formats;
@ -359,7 +359,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
Vector<VkDeviceSize> offsets;
};
RID_Owner<VertexArray> vertex_array_owner;
RID_Owner<VertexArray, true> vertex_array_owner;
struct IndexBuffer : public Buffer {
uint32_t max_index; //used for validation
@ -368,7 +368,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
bool supports_restart_indices;
};
RID_Owner<IndexBuffer> index_buffer_owner;
RID_Owner<IndexBuffer, true> index_buffer_owner;
struct IndexArray {
uint32_t max_index; //remember the maximum index here too, for validation
@ -379,7 +379,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
bool supports_restart_indices;
};
RID_Owner<IndexArray> index_array_owner;
RID_Owner<IndexArray, true> index_array_owner;
/****************/
/**** SHADER ****/
@ -495,7 +495,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
String _shader_uniform_debug(RID p_shader, int p_set = -1);
RID_Owner<Shader> shader_owner;
RID_Owner<Shader, true> shader_owner;
/******************/
/**** UNIFORMS ****/
@ -559,8 +559,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
DescriptorPool *_descriptor_pool_allocate(const DescriptorPoolKey &p_key);
void _descriptor_pool_free(const DescriptorPoolKey &p_key, DescriptorPool *p_pool);
RID_Owner<Buffer> uniform_buffer_owner;
RID_Owner<Buffer> storage_buffer_owner;
RID_Owner<Buffer, true> uniform_buffer_owner;
RID_Owner<Buffer, true> storage_buffer_owner;
//texture buffer needs a view
struct TextureBuffer {
@ -568,7 +568,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
VkBufferView view;
};
RID_Owner<TextureBuffer> texture_buffer_owner;
RID_Owner<TextureBuffer, true> texture_buffer_owner;
// This structure contains the descriptor set. They _need_ to be allocated
// for a shader (and will be erased when this shader is erased), but should
@ -589,7 +589,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
Vector<RID> attachable_textures; //used for validation
};
RID_Owner<UniformSet> uniform_set_owner;
RID_Owner<UniformSet, true> uniform_set_owner;
/*******************/
/**** PIPELINES ****/
@ -607,7 +607,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
// was not supplied as intended.
struct RenderPipeline {
//Cached values for validation
//Cached values for validation
#ifdef DEBUG_ENABLED
struct Validation {
FramebufferFormatID framebuffer_format;
@ -627,7 +627,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
uint32_t push_constant_stages;
};
RID_Owner<RenderPipeline> pipeline_owner;
RID_Owner<RenderPipeline, true> pipeline_owner;
/*******************/
/**** DRAW LIST ****/

View file

@ -61,7 +61,7 @@ int SemaphoreWindows::get() const {
ERR_FAIL_V(-1);
}
Semaphore *SemaphoreWindows::create_semaphore_windows() {
SemaphoreOld *SemaphoreWindows::create_semaphore_windows() {
return memnew(SemaphoreWindows);
}

View file

@ -37,11 +37,11 @@
#include <windows.h>
class SemaphoreWindows : public Semaphore {
class SemaphoreWindows : public SemaphoreOld {
mutable HANDLE semaphore;
static Semaphore *create_semaphore_windows();
static SemaphoreOld *create_semaphore_windows();
public:
virtual Error wait();

View file

@ -476,7 +476,7 @@ EditorResourcePreview::EditorResourcePreview() {
thread = NULL;
singleton = this;
preview_mutex = Mutex::create();
preview_sem = Semaphore::create();
preview_sem = SemaphoreOld::create();
order = 0;
exit = false;
exited = false;

View file

@ -71,7 +71,7 @@ class EditorResourcePreview : public Node {
List<QueueItem> queue;
Mutex *preview_mutex;
Semaphore *preview_sem;
SemaphoreOld *preview_sem;
Thread *thread;
volatile bool exit;
volatile bool exited;

View file

@ -140,9 +140,6 @@ static PoolVector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_s
ERR_FAIL_COND_V(p_language==RenderingDevice::SHADER_LANGUAGE_HLSL,ret);
// initialize in case it's not initialized. This is done once per thread
// and it's safe to call multiple times
glslang::InitializeProcess();
EShLanguage stages[RenderingDevice::SHADER_STAGE_MAX] = {
EShLangVertex,
EShLangFragment,
@ -229,6 +226,9 @@ static PoolVector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_s
}
void preregister_glslang_types() {
// initialize in case it's not initialized. This is done once per thread
// and it's safe to call multiple times
glslang::InitializeProcess();
RenderingDevice::shader_set_compile_function(_compile_shader_glsl);
}
@ -236,5 +236,5 @@ void register_glslang_types() {
}
void unregister_glslang_types() {
glslang::FinalizeProcess();
}

View file

@ -91,7 +91,7 @@ int SemaphoreIphone::get() const {
return 0;
}
Semaphore *SemaphoreIphone::create_semaphore_iphone() {
SemaphoreOld *SemaphoreIphone::create_semaphore_iphone() {
return memnew(SemaphoreIphone);
}

View file

@ -39,11 +39,11 @@ typedef struct cgsem cgsem_t;
#include "core/os/semaphore.h"
class SemaphoreIphone : public Semaphore {
class SemaphoreIphone : public SemaphoreOld {
mutable cgsem_t sem;
static Semaphore *create_semaphore_iphone();
static SemaphoreOld *create_semaphore_iphone();
public:
virtual Error wait();

View file

@ -86,7 +86,7 @@ int SemaphoreOSX::get() const {
return 0;
}
Semaphore *SemaphoreOSX::create_semaphore_osx() {
SemaphoreOld *SemaphoreOSX::create_semaphore_osx() {
return memnew(SemaphoreOSX);
}

View file

@ -39,11 +39,11 @@ typedef struct cgsem cgsem_t;
#include "core/os/semaphore.h"
class SemaphoreOSX : public Semaphore {
class SemaphoreOSX : public SemaphoreOld {
mutable cgsem_t sem;
static Semaphore *create_semaphore_osx();
static SemaphoreOld *create_semaphore_osx();
public:
virtual Error wait();

View file

@ -107,7 +107,7 @@ void Physics2DServerWrapMT::init() {
if (create_thread) {
step_sem = Semaphore::create();
step_sem = SemaphoreOld::create();
//OS::get_singleton()->release_rendering_thread();
if (create_thread) {
thread = Thread::create(_thread_callback, this);

View file

@ -58,7 +58,7 @@ class Physics2DServerWrapMT : public Physics2DServer {
volatile bool step_thread_up;
bool create_thread;
Semaphore *step_sem;
SemaphoreOld *step_sem;
int step_pending;
void thread_step(real_t p_delta);
void thread_flush();

View file

@ -76,7 +76,7 @@ void RasterizerRD::initialize() {
RenderingDevice::ShaderStageData frag;
frag.shader_stage = RenderingDevice::SHADER_STAGE_FRAGMENT;
frag.spir_v =RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_FRAGMENT,
frag.spir_v = RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_FRAGMENT,
"#version 450\n"
"layout (location = 0) in vec2 uv;\n"
"layout (location = 0) out vec4 color;\n"
@ -120,8 +120,12 @@ void RasterizerRD::initialize() {
}
}
ThreadWorkPool RasterizerRD::thread_work_pool;
void RasterizerRD::finalize() {
thread_work_pool.finish();
memdelete(scene);
memdelete(canvas);
memdelete(storage);
@ -133,6 +137,7 @@ void RasterizerRD::finalize() {
}
RasterizerRD::RasterizerRD() {
thread_work_pool.init();
time = 0;
storage = memnew(RasterizerStorageRD);
canvas = memnew(RasterizerCanvasRD(storage));

View file

@ -2,10 +2,12 @@
#define RASTERIZER_RD_H
#include "core/os/os.h"
#include "core/thread_work_pool.h"
#include "servers/visual/rasterizer.h"
#include "servers/visual/rasterizer_rd/rasterizer_canvas_rd.h"
#include "servers/visual/rasterizer_rd/rasterizer_scene_forward_rd.h"
#include "servers/visual/rasterizer_rd/rasterizer_storage_rd.h"
class RasterizerRD : public Rasterizer {
protected:
RasterizerCanvasRD *canvas;
@ -51,6 +53,8 @@ public:
virtual bool is_low_end() const { return true; }
static ThreadWorkPool thread_work_pool;
RasterizerRD();
~RasterizerRD() {}
};

View file

@ -4,8 +4,8 @@
#include "core/rid_owner.h"
#include "servers/visual/rasterizer.h"
#include "servers/visual/rasterizer_rd/effects_rd.h"
#include "servers/visual/rendering_device.h"
#include "servers/visual/rasterizer_rd/shader_compiler_rd.h"
#include "servers/visual/rendering_device.h"
class RasterizerStorageRD : public RasterizerStorage {
public:
@ -109,7 +109,8 @@ private:
}
};
mutable RID_Owner<Texture> texture_owner;
//textures can be created from threads, so this RID_Owner is thread safe
mutable RID_Owner<Texture, true> texture_owner;
Ref<Image> _validate_texture_format(const Ref<Image> &p_image, TextureToRDFormat &r_format);

View file

@ -30,6 +30,7 @@
#include "shader_rd.h"
#include "core/string_builder.h"
#include "rasterizer_rd.h"
#include "servers/visual/rendering_device.h"
void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name) {
@ -160,6 +161,121 @@ void ShaderRD::_clear_version(Version *p_version) {
p_version->variants = NULL;
}
}
void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
Vector<RD::ShaderStageData> stages;
String error;
String current_source;
RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX;
bool build_ok = true;
{
//vertex stage
StringBuilder builder;
builder.append(vertex_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data());
builder.append(variant_defines[p_variant].get_data());
for (int j = 0; j < p_version->custom_defines.size(); j++) {
builder.append(p_version->custom_defines[j].get_data());
}
builder.append(vertex_code0.get_data()); //first part of vertex
builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment)
builder.append(vertex_code1.get_data()); //second part of vertex
builder.append(p_version->vertex_globals.get_data()); // vertex globals
builder.append(vertex_code2.get_data()); //third part of vertex
builder.append(p_version->vertex_code.get_data()); // code
builder.append(vertex_code3.get_data()); //fourth of vertex
current_source = builder.as_string();
RD::ShaderStageData stage;
stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_VERTEX, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
if (stage.spir_v.size() == 0) {
build_ok = false;
} else {
stage.shader_stage = RD::SHADER_STAGE_VERTEX;
stages.push_back(stage);
}
}
if (build_ok) {
//fragment stage
current_stage = RD::SHADER_STAGE_FRAGMENT;
StringBuilder builder;
builder.append(fragment_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data());
builder.append(variant_defines[p_variant].get_data());
for (int j = 0; j < p_version->custom_defines.size(); j++) {
builder.append(p_version->custom_defines[j].get_data());
}
builder.append(fragment_code0.get_data()); //first part of fragment
builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment)
builder.append(fragment_code1.get_data()); //first part of fragment
builder.append(p_version->fragment_globals.get_data()); // fragment globals
builder.append(fragment_code2.get_data()); //third part of fragment
builder.append(p_version->fragment_light.get_data()); // fragment light
builder.append(fragment_code3.get_data()); //fourth part of fragment
builder.append(p_version->fragment_code.get_data()); // fragment code
builder.append(fragment_code4.get_data()); //fourth part of fragment
current_source = builder.as_string();
RD::ShaderStageData stage;
stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_FRAGMENT, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
if (stage.spir_v.size() == 0) {
build_ok = false;
} else {
stage.shader_stage = RD::SHADER_STAGE_FRAGMENT;
stages.push_back(stage);
}
}
if (!build_ok) {
variant_set_mutex.lock(); //properly print the errors
ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ").");
ERR_PRINT(error);
#ifdef DEBUG_ENABLED
ERR_PRINT("code:\n" + current_source.get_with_code_lines());
#endif
variant_set_mutex.unlock();
return;
}
RID shader = RD::get_singleton()->shader_create(stages);
variant_set_mutex.lock();
p_version->variants[p_variant] = shader;
variant_set_mutex.unlock();
}
void ShaderRD::_compile_version(Version *p_version) {
_clear_version(p_version);
@ -168,134 +284,34 @@ void ShaderRD::_compile_version(Version *p_version) {
p_version->dirty = false;
p_version->variants = memnew_arr(RID, variant_defines.size());
#if 1
RasterizerRD::thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version);
#else
for (int i = 0; i < variant_defines.size(); i++) {
Vector<RD::ShaderStageData> stages;
String error;
String current_source;
RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX;
bool build_ok=true;
{
//vertex stage
StringBuilder builder;
builder.append(vertex_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data());
builder.append(variant_defines[i].get_data());
for (int j = 0; j < p_version->custom_defines.size(); j++) {
builder.append(p_version->custom_defines[j].get_data());
}
builder.append(vertex_code0.get_data()); //first part of vertex
builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment)
builder.append(vertex_code1.get_data()); //second part of vertex
builder.append(p_version->vertex_globals.get_data()); // vertex globals
builder.append(vertex_code2.get_data()); //third part of vertex
builder.append(p_version->vertex_code.get_data()); // code
builder.append(vertex_code3.get_data()); //fourth of vertex
current_source = builder.as_string();
RD::ShaderStageData stage;
stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_VERTEX,current_source,RD::SHADER_LANGUAGE_GLSL,&error);
if (stage.spir_v.size()==0) {
build_ok=false;
} else {
stage.shader_stage = RD::SHADER_STAGE_VERTEX;
stages.push_back(stage);
}
}
if (build_ok){
//fragment stage
current_stage =RD::SHADER_STAGE_FRAGMENT;
StringBuilder builder;
builder.append(fragment_codev.get_data()); // version info (if exists)
builder.append("\n"); //make sure defines begin at newline
builder.append(general_defines.get_data());
builder.append(variant_defines[i].get_data());
for (int j = 0; j < p_version->custom_defines.size(); j++) {
builder.append(p_version->custom_defines[j].get_data());
}
builder.append(fragment_code0.get_data()); //first part of fragment
builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment)
builder.append(fragment_code1.get_data()); //first part of fragment
builder.append(p_version->fragment_globals.get_data()); // fragment globals
builder.append(fragment_code2.get_data()); //third part of fragment
builder.append(p_version->fragment_light.get_data()); // fragment light
builder.append(fragment_code3.get_data()); //fourth part of fragment
builder.append(p_version->fragment_code.get_data()); // fragment code
builder.append(fragment_code4.get_data()); //fourth part of fragment
current_source = builder.as_string();
RD::ShaderStageData stage;
stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_FRAGMENT,current_source,RD::SHADER_LANGUAGE_GLSL,&error);
if (stage.spir_v.size()==0) {
build_ok=false;
} else {
stage.shader_stage = RD::SHADER_STAGE_FRAGMENT;
stages.push_back(stage);
}
}
if (!build_ok) {
ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(i) + " (" + variant_defines[i].get_data() + ").");
ERR_PRINT(error);
#ifdef DEBUG_ENABLED
ERR_PRINT("code:\n" + current_source.get_with_code_lines());
_compile_variant(i, p_version);
}
#endif
//clear versions if they exist
for (int j = 0; j < i; j++) {
RD::get_singleton()->free(p_version->variants[j]);
}
memdelete_arr(p_version->variants);
p_version->variants = NULL;
return;
bool all_valid = true;
for (int i = 0; i < variant_defines.size(); i++) {
if (p_version->variants[i].is_null()) {
all_valid = false;
break;
}
}
RID shader = RD::get_singleton()->shader_create(stages);
if (shader.is_null()) {
//clear versions if they exist
for (int j = 0; j < i; j++) {
RD::get_singleton()->free(p_version->variants[j]);
if (!all_valid) {
//clear versions if they exist
for (int i = 0; i < variant_defines.size(); i++) {
if (!p_version->variants[i].is_null()) {
RD::get_singleton()->free(p_version->variants[i]);
}
memdelete_arr(p_version->variants);
p_version->variants = NULL;
return;
}
p_version->variants[i] = shader;
memdelete_arr(p_version->variants);
p_version->variants = NULL;
return;
}
p_version->valid = true;

View file

@ -36,7 +36,7 @@
#include "core/rid_owner.h"
#include "core/variant.h"
#include <stdio.h>
#include <mutex>
/**
@author Juan Linietsky <reduzio@gmail.com>
*/
@ -67,6 +67,10 @@ class ShaderRD {
bool initialize_needed;
};
std::mutex variant_set_mutex;
void _compile_variant(uint32_t p_variant, Version *p_version);
void _clear_version(Version *p_version);
void _compile_version(Version *p_version);

View file

@ -3511,7 +3511,7 @@ VisualServerScene *VisualServerScene::singleton = NULL;
VisualServerScene::VisualServerScene() {
#ifndef NO_THREADS
probe_bake_sem = Semaphore::create();
probe_bake_sem = SemaphoreOld::create();
probe_bake_mutex = Mutex::create();
probe_bake_thread = Thread::create(_gi_probe_bake_threads, this);
probe_bake_thread_exit = false;

View file

@ -517,7 +517,7 @@ public:
volatile bool probe_bake_thread_exit;
Thread *probe_bake_thread;
Semaphore *probe_bake_sem;
SemaphoreOld *probe_bake_sem;
Mutex *probe_bake_mutex;
List<Instance *> probe_bake_list;