767e374dce
Since Embree v3.13.0 supports AARCH64, switch back to the official repo instead of using Embree-aarch64. `thirdparty/embree/patches/godot-changes.patch` should now contain an accurate diff of the changes done to the library.
113 lines
4.1 KiB
C++
113 lines
4.1 KiB
C++
// Copyright 2009-2021 Intel Corporation
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
#pragma once
|
|
|
|
#include "parallel_for_for.h"
|
|
#include "parallel_prefix_sum.h"
|
|
|
|
namespace embree
|
|
{
|
|
template<typename Value>
|
|
struct ParallelForForPrefixSumState : public ParallelForForState
|
|
{
|
|
__forceinline ParallelForForPrefixSumState () {}
|
|
|
|
template<typename ArrayArray>
|
|
__forceinline ParallelForForPrefixSumState (ArrayArray& array2, const size_t minStepSize)
|
|
: ParallelForForState(array2,minStepSize) {}
|
|
|
|
ParallelPrefixSumState<Value> prefix_state;
|
|
};
|
|
|
|
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
|
|
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2, Index minStepSize,
|
|
const Value& identity, const Func& func, const Reduction& reduction)
|
|
{
|
|
/* calculate number of tasks to use */
|
|
const size_t taskCount = state.taskCount;
|
|
/* perform parallel prefix sum */
|
|
parallel_for(taskCount, [&](const size_t taskIndex)
|
|
{
|
|
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
|
|
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
|
|
size_t i0 = state.i0[taskIndex];
|
|
size_t j0 = state.j0[taskIndex];
|
|
|
|
/* iterate over arrays */
|
|
size_t k=k0;
|
|
Value N=identity;
|
|
for (size_t i=i0; k<k1; i++) {
|
|
const size_t size = array2[i] ? array2[i]->size() : 0;
|
|
const size_t r0 = j0, r1 = min(size,r0+k1-k);
|
|
if (r1 > r0) N = reduction(N, func(array2[i],range<Index>((Index)r0,(Index)r1),(Index)k,(Index)i));
|
|
k+=r1-r0; j0 = 0;
|
|
}
|
|
state.prefix_state.counts[taskIndex] = N;
|
|
});
|
|
|
|
/* calculate prefix sum */
|
|
Value sum=identity;
|
|
for (size_t i=0; i<taskCount; i++)
|
|
{
|
|
const Value c = state.prefix_state.counts[i];
|
|
state.prefix_state.sums[i] = sum;
|
|
sum=reduction(sum,c);
|
|
}
|
|
|
|
return sum;
|
|
}
|
|
|
|
template<typename ArrayArray, typename Index, typename Value, typename Func, typename Reduction>
|
|
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2, Index minStepSize,
|
|
const Value& identity, const Func& func, const Reduction& reduction)
|
|
{
|
|
/* calculate number of tasks to use */
|
|
const size_t taskCount = state.taskCount;
|
|
/* perform parallel prefix sum */
|
|
parallel_for(taskCount, [&](const size_t taskIndex)
|
|
{
|
|
const size_t k0 = (taskIndex+0)*state.size()/taskCount;
|
|
const size_t k1 = (taskIndex+1)*state.size()/taskCount;
|
|
size_t i0 = state.i0[taskIndex];
|
|
size_t j0 = state.j0[taskIndex];
|
|
|
|
/* iterate over arrays */
|
|
size_t k=k0;
|
|
Value N=identity;
|
|
for (size_t i=i0; k<k1; i++) {
|
|
const size_t size = array2[i] ? array2[i]->size() : 0;
|
|
const size_t r0 = j0, r1 = min(size,r0+k1-k);
|
|
if (r1 > r0) N = reduction(N, func(array2[i],range<Index>((Index)r0,(Index)r1),(Index)k,(Index)i,reduction(state.prefix_state.sums[taskIndex],N)));
|
|
k+=r1-r0; j0 = 0;
|
|
}
|
|
state.prefix_state.counts[taskIndex] = N;
|
|
});
|
|
|
|
/* calculate prefix sum */
|
|
Value sum=identity;
|
|
for (size_t i=0; i<taskCount; i++)
|
|
{
|
|
const Value c = state.prefix_state.counts[i];
|
|
state.prefix_state.sums[i] = sum;
|
|
sum=reduction(sum,c);
|
|
}
|
|
|
|
return sum;
|
|
}
|
|
|
|
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
|
__forceinline Value parallel_for_for_prefix_sum0( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
|
|
const Value& identity, const Func& func, const Reduction& reduction)
|
|
{
|
|
return parallel_for_for_prefix_sum0(state,array2,size_t(1),identity,func,reduction);
|
|
}
|
|
|
|
template<typename ArrayArray, typename Value, typename Func, typename Reduction>
|
|
__forceinline Value parallel_for_for_prefix_sum1( ParallelForForPrefixSumState<Value>& state, ArrayArray& array2,
|
|
const Value& identity, const Func& func, const Reduction& reduction)
|
|
{
|
|
return parallel_for_for_prefix_sum1(state,array2,size_t(1),identity,func,reduction);
|
|
}
|
|
}
|