godot/thirdparty/embree/kernels/builders/bvh_builder_hair.h
jfons 767e374dce Upgrade Embree to the latest official release.
Since Embree v3.13.0 supports AARCH64, switch back to the
official repo instead of using Embree-aarch64.

`thirdparty/embree/patches/godot-changes.patch` should now contain
an accurate diff of the changes done to the library.
2021-05-21 17:00:24 +02:00

411 lines
17 KiB
C++

// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../bvh/bvh.h"
#include "../geometry/primitive.h"
#include "../builders/bvh_builder_sah.h"
#include "../builders/heuristic_binning_array_aligned.h"
#include "../builders/heuristic_binning_array_unaligned.h"
#include "../builders/heuristic_strand_array.h"
#define NUM_HAIR_OBJECT_BINS 32
namespace embree
{
namespace isa
{
struct BVHBuilderHair
{
/*! settings for builder */
struct Settings
{
/*! default settings */
Settings ()
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {}
public:
size_t branchingFactor; //!< branching factor of BVH to build
size_t maxDepth; //!< maximum depth of BVH to build
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
size_t minLeafSize; //!< minimum size of a leaf
size_t maxLeafSize; //!< maximum size of a leaf
size_t finished_range_threshold; //!< finished range threshold
};
template<typename NodeRef,
typename CreateAllocFunc,
typename CreateAABBNodeFunc,
typename SetAABBNodeFunc,
typename CreateOBBNodeFunc,
typename SetOBBNodeFunc,
typename CreateLeafFunc,
typename ProgressMonitor,
typename ReportFinishedRangeFunc>
class BuilderT
{
ALIGNED_CLASS_(16);
friend struct BVHBuilderHair;
typedef FastAllocator::CachedAllocator Allocator;
typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH;
typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH;
typedef HeuristicStrandSplit HeuristicStrandSplitSAH;
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
static const size_t travCostAligned = 1;
static const size_t travCostUnaligned = 5;
static const size_t intCost = 6;
BuilderT (Scene* scene,
PrimRef* prims,
const CreateAllocFunc& createAlloc,
const CreateAABBNodeFunc& createAABBNode,
const SetAABBNodeFunc& setAABBNode,
const CreateOBBNodeFunc& createOBBNode,
const SetOBBNodeFunc& setOBBNode,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
const ReportFinishedRangeFunc& reportFinishedRange,
const Settings settings)
: cfg(settings),
prims(prims),
createAlloc(createAlloc),
createAABBNode(createAABBNode),
setAABBNode(setAABBNode),
createOBBNode(createOBBNode),
setOBBNode(setOBBNode),
createLeaf(createLeaf),
progressMonitor(progressMonitor),
reportFinishedRange(reportFinishedRange),
alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {}
/*! checks if all primitives are from the same geometry */
__forceinline bool sameGeometry(const PrimInfoRange& range)
{
if (range.size() == 0) return true;
unsigned int firstGeomID = prims[range.begin()].geomID();
for (size_t i=range.begin()+1; i<range.end(); i++) {
if (prims[i].geomID() != firstGeomID){
return false;
}
}
return true;
}
/*! creates a large leaf that could be larger than supported by the BVH */
NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc)
{
/* this should never occur but is a fatal error */
if (depth > cfg.maxDepth)
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
/* create leaf for few primitives */
if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo))
return createLeaf(prims,pinfo,alloc);
/* fill all children by always splitting the largest one */
PrimInfoRange children[MAX_BRANCHING_FACTOR];
unsigned numChildren = 1;
children[0] = pinfo;
do {
/* find best child with largest bounding box area */
int bestChild = -1;
size_t bestSize = 0;
for (unsigned i=0; i<numChildren; i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i]))
continue;
/* remember child with largest size */
if (children[i].size() > bestSize) {
bestSize = children[i].size();
bestChild = i;
}
}
if (bestChild == -1) break;
/*! split best child into left and right child */
__aligned(64) PrimInfoRange left, right;
if (!sameGeometry(children[bestChild])) {
alignedHeuristic.splitByGeometry(children[bestChild],left,right);
} else {
alignedHeuristic.splitFallback(children[bestChild],left,right);
}
/* add new children left and right */
children[bestChild] = children[numChildren-1];
children[numChildren-1] = left;
children[numChildren+0] = right;
numChildren++;
} while (numChildren < cfg.branchingFactor);
/* create node */
auto node = createAABBNode(alloc);
for (size_t i=0; i<numChildren; i++) {
const NodeRef child = createLargeLeaf(depth+1,children[i],alloc);
setAABBNode(node,i,child,children[i].geomBounds);
}
return node;
}
/*! performs split */
__noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack
{
/* variable to track the SAH of the best splitting approach */
float bestSAH = inf;
const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize;
const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds);
/* try standard binning in aligned space */
float alignedObjectSAH = inf;
HeuristicBinningSAH::Split alignedObjectSplit;
if (aligned) {
alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize);
alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH();
bestSAH = min(alignedObjectSAH,bestSAH);
}
/* try standard binning in unaligned space */
UnalignedHeuristicBinningSAH::Split unalignedObjectSplit;
LinearSpace3fa uspace;
float unalignedObjectSAH = inf;
if (bestSAH > 0.7f*leafSAH) {
uspace = unalignedHeuristic.computeAlignedSpace(pinfo);
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace);
unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace);
unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH();
bestSAH = min(unalignedObjectSAH,bestSAH);
}
/* try splitting into two strands */
HeuristicStrandSplitSAH::Split strandSplit;
float strandSAH = inf;
if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) {
strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize);
strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH();
bestSAH = min(strandSAH,bestSAH);
}
/* fallback if SAH heuristics failed */
if (unlikely(!std::isfinite(bestSAH)))
{
alignedHeuristic.deterministic_order(pinfo);
alignedHeuristic.splitFallback(pinfo,linfo,rinfo);
}
/* perform aligned split if this is best */
else if (bestSAH == alignedObjectSAH) {
alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo);
}
/* perform unaligned split if this is best */
else if (bestSAH == unalignedObjectSAH) {
unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo);
aligned = false;
}
/* perform strand split if this is best */
else if (bestSAH == strandSAH) {
strandHeuristic.split(strandSplit,pinfo,linfo,rinfo);
aligned = false;
}
/* can never happen */
else
assert(false);
}
/*! recursive build */
NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier)
{
/* get thread local allocator */
if (!alloc)
alloc = createAlloc();
/* call memory monitor function to signal progress */
if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD)
progressMonitor(pinfo.size());
PrimInfoRange children[MAX_BRANCHING_FACTOR];
/* create leaf node */
if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) {
alignedHeuristic.deterministic_order(pinfo);
return createLargeLeaf(depth,pinfo,alloc);
}
/* fill all children by always splitting the one with the largest surface area */
size_t numChildren = 1;
children[0] = pinfo;
bool aligned = true;
do {
/* find best child with largest bounding box area */
ssize_t bestChild = -1;
float bestArea = neg_inf;
for (size_t i=0; i<numChildren; i++)
{
/* ignore leaves as they cannot get split */
if (children[i].size() <= cfg.minLeafSize)
continue;
/* remember child with largest area */
if (area(children[i].geomBounds) > bestArea) {
bestArea = area(children[i].geomBounds);
bestChild = i;
}
}
if (bestChild == -1) break;
/*! split best child into left and right child */
PrimInfoRange left, right;
split(children[bestChild],left,right,aligned);
/* add new children left and right */
children[bestChild] = children[numChildren-1];
children[numChildren-1] = left;
children[numChildren+0] = right;
numChildren++;
} while (numChildren < cfg.branchingFactor);
NodeRef node;
/* create aligned node */
if (aligned)
{
node = createAABBNode(alloc);
/* spawn tasks or ... */
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
{
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds);
_mm_mfence(); // to allow non-temporal stores during build
}
});
}
/* ... continue sequentially */
else {
for (size_t i=0; i<numChildren; i++) {
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds);
}
}
}
/* create unaligned node */
else
{
node = createOBBNode(alloc);
/* spawn tasks or ... */
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
{
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
for (size_t i=r.begin(); i<r.end(); i++) {
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
const OBBox3fa obounds(space,sinfo.geomBounds);
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds);
_mm_mfence(); // to allow non-temporal stores during build
}
});
}
/* ... continue sequentially */
else
{
for (size_t i=0; i<numChildren; i++) {
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
const OBBox3fa obounds(space,sinfo.geomBounds);
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds);
}
}
}
/* reports a finished range of primrefs */
if (unlikely(alloc_barrier))
reportFinishedRange(pinfo);
return node;
}
private:
Settings cfg;
PrimRef* prims;
const CreateAllocFunc& createAlloc;
const CreateAABBNodeFunc& createAABBNode;
const SetAABBNodeFunc& setAABBNode;
const CreateOBBNodeFunc& createOBBNode;
const SetOBBNodeFunc& setOBBNode;
const CreateLeafFunc& createLeaf;
const ProgressMonitor& progressMonitor;
const ReportFinishedRangeFunc& reportFinishedRange;
private:
HeuristicBinningSAH alignedHeuristic;
UnalignedHeuristicBinningSAH unalignedHeuristic;
HeuristicStrandSplitSAH strandHeuristic;
};
template<typename NodeRef,
typename CreateAllocFunc,
typename CreateAABBNodeFunc,
typename SetAABBNodeFunc,
typename CreateOBBNodeFunc,
typename SetOBBNodeFunc,
typename CreateLeafFunc,
typename ProgressMonitor,
typename ReportFinishedRangeFunc>
static NodeRef build (const CreateAllocFunc& createAlloc,
const CreateAABBNodeFunc& createAABBNode,
const SetAABBNodeFunc& setAABBNode,
const CreateOBBNodeFunc& createOBBNode,
const SetOBBNodeFunc& setOBBNode,
const CreateLeafFunc& createLeaf,
const ProgressMonitor& progressMonitor,
const ReportFinishedRangeFunc& reportFinishedRange,
Scene* scene,
PrimRef* prims,
const PrimInfo& pinfo,
const Settings settings)
{
typedef BuilderT<NodeRef,
CreateAllocFunc,
CreateAABBNodeFunc,SetAABBNodeFunc,
CreateOBBNodeFunc,SetOBBNodeFunc,
CreateLeafFunc,ProgressMonitor,
ReportFinishedRangeFunc> Builder;
Builder builder(scene,prims,createAlloc,
createAABBNode,setAABBNode,
createOBBNode,setOBBNode,
createLeaf,progressMonitor,reportFinishedRange,settings);
NodeRef root = builder.recurse(1,pinfo,nullptr,true,false);
_mm_mfence(); // to allow non-temporal stores during build
return root;
}
};
}
}