-Removed OpenMP support, replaced by a custom class.

-Disabled Opus, implementation is wrong.
This commit is contained in:
Juan Linietsky 2017-12-24 09:31:17 -03:00
parent 83182ea4a1
commit 021f3c924b
10 changed files with 119 additions and 68 deletions

View file

@ -168,7 +168,6 @@ opts.Add(BoolVariable('vsproj', "Generate Visual Studio Project.", False))
opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no')))
opts.Add(BoolVariable('progress', "Show a progress indicator during build", True))
opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False))
opts.Add(BoolVariable('openmp', "If yes, enable OpenMP", True))
opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel')))
# Thirdparty libraries

View file

@ -0,0 +1,2 @@
#include "threaded_array_processor.h"

View file

@ -0,0 +1,80 @@
#ifndef THREADED_ARRAY_PROCESSOR_H
#define THREADED_ARRAY_PROCESSOR_H
#include "os/mutex.h"
#include "os/os.h"
#include "os/thread.h"
#include "safe_refcount.h"
#include "thread_safe.h"
template <class C, class U>
struct ThreadArrayProcessData {
uint32_t elements;
uint32_t index;
C *instance;
U userdata;
void (C::*method)(uint32_t, U);
void process(uint32_t p_index) {
(instance->*method)(p_index, userdata);
}
};
#ifndef NO_THREADS
template <class T>
void process_array_thread(void *ud) {
T &data = *(T *)ud;
while (true) {
uint32_t index = atomic_increment(&data.index);
if (index >= data.elements)
break;
data.process(index);
}
}
template <class C, class M, class U>
void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
ThreadArrayProcessData<C, U> data;
data.method = p_method;
data.instance = p_instance;
data.userdata = p_userdata;
data.index = 0;
data.elements = p_elements;
data.process(data.index); //process first, let threads increment for next
Vector<Thread *> threads;
threads.resize(OS::get_singleton()->get_processor_count());
for (int i = 0; i < threads.size(); i++) {
threads[i] = Thread::create(process_array_thread<ThreadArrayProcessData<C, U> >, &data);
}
for (int i = 0; i < threads.size(); i++) {
Thread::wait_to_finish(threads[i]);
memdelete(threads[i]);
}
}
#else
template <class C, class M, class U>
void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
ThreadArrayProcessData<C, U> data;
data.method = p_method;
data.instance = p_instance;
data.userdata = p_userdata;
data.index = 0;
data.elements = p_elements;
for (uint32_t i = 0; i < p_elements; i++) {
data.process(i);
}
}
#endif
#endif // THREADED_ARRAY_PROCESSOR_H

View file

@ -1,5 +1,10 @@
def can_build(platform):
return True
# Sorry guys, do not enable this unless you can figure out a way
# to get Opus to not do any memory allocation or system calls
# in the audio thread.
# Currently the implementation even reads files from the audio thread,
# and this is not how audio programming works.
return False
def configure(env):
pass

View file

@ -82,9 +82,6 @@ def configure(env):
env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib"
env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as"
env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define
if env['tools'] and env['openmp']:
env.Append(CPPFLAGS=['-fopenmp'])
env.Append(LINKFLAGS=['-fopenmp'])
else: # osxcross build
root = os.environ.get("OSXCROSS_ROOT", 0)

View file

@ -191,8 +191,6 @@ def configure(env):
if (env["use_lto"]):
env.Append(CCFLAGS=['/GL'])
env.Append(LINKFLAGS=['/LTCG'])
if env['tools'] and env['openmp']:
env.Append(CPPFLAGS=['/openmp'])
env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")])
env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")])
@ -270,9 +268,6 @@ def configure(env):
env.Append(CCFLAGS=['-flto'])
env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))])
if env['tools'] and env['openmp']:
env.Append(CPPFLAGS=['-fopenmp'])
env.Append(LINKFLAGS=['-fopenmp'])
## Compile flags

View file

@ -265,9 +265,5 @@ def configure(env):
env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu'])
if env['tools'] and env['openmp']:
env.Append(CPPFLAGS=['-fopenmp'])
env.Append(LINKFLAGS=['-fopenmp'])
if env['use_static_cpp']:
env.Append(LINKFLAGS=['-static-libstdc++'])

View file

@ -772,8 +772,8 @@ void BakedLightmap::_bind_methods() {
BakedLightmap::BakedLightmap() {
extents = Vector3(10, 10, 10);
bake_cell_size = 0.1;
capture_cell_size = 0.25;
bake_cell_size = 0.25;
capture_cell_size = 0.5;
bake_quality = BAKE_QUALITY_MEDIUM;
bake_mode = BAKE_MODE_CONE_TRACE;

View file

@ -30,11 +30,9 @@
#include "voxel_light_baker.h"
#include "os/os.h"
#include "os/threaded_array_processor.h"
#include <stdlib.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#define FINDMINMAX(x0, x1, x2, min, max) \
min = max = x0; \
@ -1689,7 +1687,7 @@ _ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
return x;
}
Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) {
Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
int samples_per_quality[3] = { 48, 128, 512 };
@ -1711,8 +1709,7 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
const Light *light = bake_light.ptr();
const Cell *cells = bake_cells.ptr();
// Prevent false sharing when running on OpenMP
uint32_t local_rng_state = *rng_state;
uint32_t local_rng_state = rand(); //needs to be fixed again
for (int i = 0; i < samples; i++) {
@ -1796,10 +1793,30 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
}
// Make sure we don't reset this thread's RNG state
*rng_state = local_rng_state;
return accum / samples;
}
void VoxelLightBaker::_lightmap_bake_point(uint32_t p_x, LightMap *p_line) {
LightMap *pixel = &p_line[p_x];
if (pixel->pos == Vector3())
return;
//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
switch (bake_mode) {
case BAKE_MODE_CONE_TRACE: {
pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
} break;
case BAKE_MODE_RAY_TRACE: {
pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
} break;
// pixel->light = Vector3(1, 1, 1);
//}
}
}
Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) {
//transfer light information to a lightmap
@ -1862,53 +1879,10 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
volatile int lines = 0;
// make sure our OS-level rng is seeded
srand(OS::get_singleton()->get_ticks_usec());
// setup an RNG state for each OpenMP thread
uint32_t threadcount = 1;
uint32_t threadnum = 0;
#ifdef _OPENMP
threadcount = omp_get_max_threads();
#endif
Vector<uint32_t> rng_states;
rng_states.resize(threadcount);
for (uint32_t i = 0; i < threadcount; i++) {
do {
rng_states[i] = rand();
} while (rng_states[i] == 0);
}
uint32_t *rng_states_p = rng_states.ptrw();
for (int i = 0; i < height; i++) {
//print_line("bake line " + itos(i) + " / " + itos(height));
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 1) private(threadnum)
#endif
for (int j = 0; j < width; j++) {
#ifdef _OPENMP
threadnum = omp_get_thread_num();
#endif
//if (i == 125 && j == 280) {
LightMap *pixel = &lightmap_ptr[i * width + j];
if (pixel->pos == Vector3())
continue; //unused, skipe
//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
switch (bake_mode) {
case BAKE_MODE_CONE_TRACE: {
pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
} break;
case BAKE_MODE_RAY_TRACE: {
pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy;
} break;
// pixel->light = Vector3(1, 1, 1);
//}
}
}
thread_process_array(width,this,&VoxelLightBaker::_lightmap_bake_point,&lightmap_ptr[i*width]);
lines = MAX(lines, i); //for multithread
if (p_bake_time_func) {

View file

@ -148,9 +148,12 @@ private:
_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state);
_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
void _lightmap_bake_point(uint32_t p_x, LightMap *p_line);
public:
void begin_bake(int p_subdiv, const AABB &p_bounds);
void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material);
void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);