godot/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
Joan Fons 595cbacdf1 Upgrade Embree and enable ray packets
Minor patch upgrade. Enabling ray packets results in faster
processing of ray streams (i.e. occlusion culling buffer
updates) at the cost of slightly larger binary sizes.
2021-09-13 16:17:28 +02:00

918 lines
35 KiB
C++

// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "bvh_intersector_hybrid.h"
#include "bvh_traverser1.h"
#include "node_intersector1.h"
#include "node_intersector_packet.h"
#include "../geometry/intersector_iterators.h"
#include "../geometry/triangle_intersector.h"
#include "../geometry/trianglev_intersector.h"
#include "../geometry/trianglev_mb_intersector.h"
#include "../geometry/trianglei_intersector.h"
#include "../geometry/quadv_intersector.h"
#include "../geometry/quadi_intersector.h"
#include "../geometry/curveNv_intersector.h"
#include "../geometry/curveNi_intersector.h"
#include "../geometry/curveNi_mb_intersector.h"
#include "../geometry/linei_intersector.h"
#include "../geometry/subdivpatch1_intersector.h"
#include "../geometry/object_intersector.h"
#include "../geometry/instance_intersector.h"
#include "../geometry/subgrid_intersector.h"
#include "../geometry/subgrid_mb_intersector.h"
#include "../geometry/curve_intersector_virtual.h"
#define SWITCH_DURING_DOWN_TRAVERSAL 1
#define FORCE_SINGLE_MODE 0
#define ENABLE_FAST_COHERENT_CODEPATHS 1
namespace embree
{
namespace isa
{
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
const BVH* bvh,
NodeRef root,
size_t k,
Precalculations& pre,
RayHitK<K>& ray,
const TravRayK<K, robust>& tray,
IntersectContext* context)
{
/* stack state */
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
stack[0].ptr = root;
stack[0].dist = neg_inf;
/* load the ray into SIMD registers */
TravRay<N,robust> tray1;
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* if popped node is too far, pop next one */
if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
continue;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(normal.trav_nodes, 1, 1, 1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(normal.trav_leaves, 1, 1, 1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
tray1.tfar = ray.tfar[k];
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayHitK<K>& __restrict__ ray,
IntersectContext* __restrict__ context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
assert(context);
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
{
intersectCoherent(valid_i, This, ray, context);
return;
}
#endif
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
#if defined(__AVX__)
STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
#endif
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
if (single)
{
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
for (; valid_bits!=0; ) {
const size_t i = bscf(valid_bits);
intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
}
return;
}
/* determine switch threshold based on flags */
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
/* test whether we have ray with opposing direction signs in the packet */
bool split = false;
{
size_t bits = valid_bits;
vbool<K> vsplit( false );
do
{
const size_t valid_index = bsf(bits);
vbool<K> octant_valid = octant[valid_index] == octant;
bits &= ~(size_t)movemask(octant_valid);
vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
} while (bits);
if (any(vsplit)) split = true;
}
do
{
const size_t valid_index = bsf(valid_bits);
const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
const vint<K> count_diff_octant = \
((diff_octant >> 2) & 1) +
((diff_octant >> 1) & 1) +
((diff_octant >> 0) & 1);
vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
octant = select(octant_valid,vint<K>(0xffffffff),octant);
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
/* allocate stack and push root node */
vfloat<K> stack_near[stackSizeChunk];
NodeRef stack_node[stackSizeChunk];
stack_node[0] = BVH::invalidNode;
stack_near[0] = inf;
stack_node[1] = bvh->root;
stack_near[1] = tray.tnear;
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
NodeRef* __restrict__ sptr_node = stack_node + 2;
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
while (1) pop:
{
/* pop next node from stack */
assert(sptr_node > stack_node);
sptr_node--;
sptr_near--;
NodeRef cur = *sptr_node;
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* cull node if behind closest hit point */
vfloat<K> curDist = *sptr_near;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active)))
continue;
/* switch to single ray traversal */
#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__)
#if FORCE_SINGLE_MODE == 0
if (single)
#endif
{
size_t bits = movemask(active);
#if FORCE_SINGLE_MODE == 0
if (unlikely(popcnt(bits) <= switchThreshold))
#endif
{
for (; bits!=0; ) {
const size_t i = bscf(bits);
intersect1(This, bvh, cur, i, pre, ray, tray, context);
}
tray.tfar = min(tray.tfar, ray.tfar);
continue;
}
}
#endif
while (likely(!cur.isLeaf()))
{
/* process nodes */
const vbool<K> valid_node = tray.tfar > curDist;
STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const BaseNode* __restrict__ const node = nodeRef.baseNode();
/* set cur to invalid */
cur = BVH::emptyNode;
curDist = pos_inf;
size_t num_child_hits = 0;
for (unsigned i = 0; i < N; i++)
{
const NodeRef child = node->children[i];
if (unlikely(child == BVH::emptyNode)) break;
vfloat<K> lnearP;
vbool<K> lhit = valid_node;
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
/* if we hit the child we choose to continue with that child if it
is closer than the current next child, or we push it onto the stack */
if (likely(any(lhit)))
{
assert(sptr_node < stackEnd);
assert(child != BVH::emptyNode);
const vfloat<K> childDist = select(lhit, lnearP, inf);
/* push cur node onto stack and continue with hit child */
if (any(childDist < curDist))
{
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
*sptr_node = cur; sptr_node++;
*sptr_near = curDist; sptr_near++;
}
curDist = childDist;
cur = child;
}
/* push hit child onto stack */
else {
num_child_hits++;
*sptr_node = child; sptr_node++;
*sptr_near = childDist; sptr_near++;
}
}
}
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
#endif
if (unlikely(cur == BVH::emptyNode))
goto pop;
/* improved distance sorting for 3 or more hits */
if (unlikely(num_child_hits >= 2))
{
if (any(sptr_near[-2] < sptr_near[-1]))
{
std::swap(sptr_near[-2],sptr_near[-1]);
std::swap(sptr_node[-2],sptr_node[-1]);
}
if (unlikely(num_child_hits >= 3))
{
if (any(sptr_near[-3] < sptr_near[-1]))
{
std::swap(sptr_near[-3],sptr_near[-1]);
std::swap(sptr_node[-3],sptr_node[-1]);
}
if (any(sptr_near[-3] < sptr_near[-2]))
{
std::swap(sptr_near[-3],sptr_near[-2]);
std::swap(sptr_node[-3],sptr_node[-2]);
}
}
}
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
if (single)
{
// seems to be the best place for testing utilization
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
{
*sptr_node++ = cur;
*sptr_near++ = curDist;
goto pop;
}
}
#endif
}
/* return if stack is empty */
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* intersect leaf */
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
if (unlikely(lazy_node)) {
*sptr_node = lazy_node; sptr_node++;
*sptr_near = neg_inf; sptr_near++;
}
}
} while(valid_bits);
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayHitK<K>& __restrict__ ray,
IntersectContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
do
{
const size_t valid_index = bsf(valid_bits);
const vbool<K> octant_valid = octant[valid_index] == octant;
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
Frustum<robust> frustum;
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
stack[0].ptr = bvh->root;
stack[0].dist = neg_inf;
while (1) pop:
{
/* pop next node from stack */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* cull node if behind closest hit point */
vfloat<K> curDist = *(float*)&stackPtr->dist;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active))) continue;
while (likely(!cur.isLeaf()))
{
/* process nodes */
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
vfloat<N> fmin;
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
if (unlikely(!m_frustum_node)) goto pop;
cur = BVH::emptyNode;
curDist = pos_inf;
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
#endif
size_t num_child_hits = 0;
do {
const size_t i = bscf(m_frustum_node);
vfloat<K> lnearP;
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
STAT3(normal.trav_nodes, 1, 1, 1);
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
if (likely(any(lhit)))
{
const vfloat<K> childDist = fmin[i];
const NodeRef child = node->child(i);
BVHN<N>::prefetch(child);
if (any(childDist < curDist))
{
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
stackPtr->ptr = cur;
*(float*)&stackPtr->dist = toScalar(curDist);
stackPtr++;
}
curDist = childDist;
cur = child;
}
/* push hit child onto stack */
else {
num_child_hits++;
stackPtr->ptr = child;
*(float*)&stackPtr->dist = toScalar(childDist);
stackPtr++;
}
}
} while(m_frustum_node);
if (unlikely(cur == BVH::emptyNode)) goto pop;
/* improved distance sorting for 3 or more hits */
if (unlikely(num_child_hits >= 2))
{
if (stackPtr[-2].dist < stackPtr[-1].dist)
std::swap(stackPtr[-2],stackPtr[-1]);
if (unlikely(num_child_hits >= 3))
{
if (stackPtr[-3].dist < stackPtr[-1].dist)
std::swap(stackPtr[-3],stackPtr[-1]);
if (stackPtr[-3].dist < stackPtr[-2].dist)
std::swap(stackPtr[-3],stackPtr[-2]);
}
}
}
/* intersect leaf */
assert(cur != BVH::invalidNode);
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
/* reduce max distance interval on successful intersection */
if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
{
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
frustum.template updateMaxDist<K>(tray.tfar);
}
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->dist = neg_inf;
stackPtr++;
}
}
} while(valid_bits);
}
// ===================================================================================================================================================================
// ===================================================================================================================================================================
// ===================================================================================================================================================================
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
const BVH* bvh,
NodeRef root,
size_t k,
Precalculations& pre,
RayK<K>& ray,
const TravRayK<K, robust>& tray,
IntersectContext* context)
{
/* stack state */
NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
NodeRef* stackPtr = stack+1; // current stack pointer
NodeRef* stackEnd = stack+stackSizeSingle;
stack[0] = root;
/* load the ray into SIMD registers */
TravRay<N,robust> tray1;
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
/* pop loop */
while (true) pop:
{
/* pop next node */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = (NodeRef)*stackPtr;
/* downtraversal loop */
while (true)
{
/* intersect node */
size_t mask; vfloat<N> tNear;
STAT3(shadow.trav_nodes, 1, 1, 1);
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
/* if no child is hit, pop next node */
if (unlikely(mask == 0))
goto pop;
/* select next child and push other children */
BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
}
/* this is a leaf node */
assert(cur != BVH::emptyNode);
STAT3(shadow.trav_leaves, 1, 1, 1);
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
size_t lazy_node = 0;
if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
ray.tfar[k] = neg_inf;
return true;
}
if (unlikely(lazy_node)) {
*stackPtr = lazy_node;
stackPtr++;
}
}
return false;
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayK<K>& __restrict__ ray,
IntersectContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* we may traverse an empty BVH in case all geometry was invalid */
if (bvh->root == BVH::emptyNode)
return;
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
assert(context);
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
{
occludedCoherent(valid_i, This, ray, context);
return;
}
#endif
/* filter out already occluded and invalid rays */
vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
const size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid, ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
vbool<K> terminated = !valid;
const vfloat<K> inf = vfloat<K>(pos_inf);
/* determine switch threshold based on flags */
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
/* allocate stack and push root node */
vfloat<K> stack_near[stackSizeChunk];
NodeRef stack_node[stackSizeChunk];
stack_node[0] = BVH::invalidNode;
stack_near[0] = inf;
stack_node[1] = bvh->root;
stack_near[1] = tray.tnear;
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
NodeRef* __restrict__ sptr_node = stack_node + 2;
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
while (1) pop:
{
/* pop next node from stack */
assert(sptr_node > stack_node);
sptr_node--;
sptr_near--;
NodeRef cur = *sptr_node;
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* cull node if behind closest hit point */
vfloat<K> curDist = *sptr_near;
const vbool<K> active = curDist < tray.tfar;
if (unlikely(none(active)))
continue;
/* switch to single ray traversal */
#if (!defined(__WIN32__) || defined(__X86_64__)) && defined(__SSE4_2__)
#if FORCE_SINGLE_MODE == 0
if (single)
#endif
{
size_t bits = movemask(active);
#if FORCE_SINGLE_MODE == 0
if (unlikely(popcnt(bits) <= switchThreshold))
#endif
{
for (; bits!=0; ) {
const size_t i = bscf(bits);
if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
set(terminated, i);
}
if (all(terminated)) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
continue;
}
}
#endif
while (likely(!cur.isLeaf()))
{
/* process nodes */
const vbool<K> valid_node = tray.tfar > curDist;
STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const BaseNode* __restrict__ const node = nodeRef.baseNode();
/* set cur to invalid */
cur = BVH::emptyNode;
curDist = pos_inf;
for (unsigned i = 0; i < N; i++)
{
const NodeRef child = node->children[i];
if (unlikely(child == BVH::emptyNode)) break;
vfloat<K> lnearP;
vbool<K> lhit = valid_node;
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
/* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
if (likely(any(lhit)))
{
assert(sptr_node < stackEnd);
assert(child != BVH::emptyNode);
const vfloat<K> childDist = select(lhit, lnearP, inf);
/* push 'cur' node onto stack and continue with hit child */
if (likely(cur != BVH::emptyNode)) {
*sptr_node = cur; sptr_node++;
*sptr_near = curDist; sptr_near++;
}
curDist = childDist;
cur = child;
}
}
if (unlikely(cur == BVH::emptyNode))
goto pop;
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
if (single)
{
// seems to be the best place for testing utilization
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
{
*sptr_node++ = cur;
*sptr_near++ = curDist;
goto pop;
}
}
#endif
}
/* return if stack is empty */
if (unlikely(cur == BVH::invalidNode)) {
assert(sptr_node == stack_node);
break;
}
/* intersect leaf */
assert(cur != BVH::emptyNode);
const vbool<K> valid_leaf = tray.tfar > curDist;
STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
if (unlikely(none(valid_leaf))) continue;
size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
size_t lazy_node = 0;
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
if (all(terminated)) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
if (unlikely(lazy_node)) {
*sptr_node = lazy_node; sptr_node++;
*sptr_near = neg_inf; sptr_near++;
}
}
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
}
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
Accel::Intersectors* __restrict__ This,
RayK<K>& __restrict__ ray,
IntersectContext* context)
{
BVH* __restrict__ bvh = (BVH*)This->ptr;
/* filter out invalid rays */
vbool<K> valid = *valid_i == -1;
#if defined(EMBREE_IGNORE_INVALID_RAYS)
valid &= ray.valid();
#endif
/* return if there are no valid rays */
size_t valid_bits = movemask(valid);
if (unlikely(valid_bits == 0)) return;
/* verify correct input */
assert(all(valid, ray.valid()));
assert(all(valid, ray.tnear() >= 0.0f));
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
Precalculations pre(valid,ray);
/* load ray */
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
vbool<K> terminated = !valid;
vint<K> octant = ray.octant();
octant = select(valid, octant, vint<K>(0xffffffff));
do
{
const size_t valid_index = bsf(valid_bits);
vbool<K> octant_valid = octant[valid_index] == octant;
valid_bits &= ~(size_t)movemask(octant_valid);
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
Frustum<robust> frustum;
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
stack[0].ptr = bvh->root;
stack[0].mask = movemask(octant_valid);
while (1) pop:
{
/* pop next node from stack */
if (unlikely(stackPtr == stack)) break;
stackPtr--;
NodeRef cur = NodeRef(stackPtr->ptr);
/* cull node of active rays have already been terminated */
size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
if (unlikely(m_active == 0)) continue;
while (likely(!cur.isLeaf()))
{
/* process nodes */
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
const NodeRef nodeRef = cur;
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
vfloat<N> fmin;
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
if (unlikely(!m_frustum_node)) goto pop;
cur = BVH::emptyNode;
m_active = 0;
#if defined(__AVX__)
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
#endif
size_t num_child_hits = 0;
do {
const size_t i = bscf(m_frustum_node);
vfloat<K> lnearP;
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
STAT3(normal.trav_nodes, 1, 1, 1);
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
if (likely(any(lhit)))
{
const NodeRef child = node->child(i);
assert(child != BVH::emptyNode);
BVHN<N>::prefetch(child);
if (likely(cur != BVH::emptyNode)) {
num_child_hits++;
stackPtr->ptr = cur;
stackPtr->mask = m_active;
stackPtr++;
}
cur = child;
m_active = movemask(lhit);
}
} while(m_frustum_node);
if (unlikely(cur == BVH::emptyNode)) goto pop;
}
/* intersect leaf */
assert(cur != BVH::invalidNode);
assert(cur != BVH::emptyNode);
#if defined(__AVX__)
STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
#endif
if (unlikely(!m_active)) continue;
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
size_t lazy_node = 0;
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
octant_valid &= !terminated;
if (unlikely(none(octant_valid))) break;
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
if (unlikely(lazy_node)) {
stackPtr->ptr = lazy_node;
stackPtr->mask = movemask(octant_valid);
stackPtr++;
}
}
} while(valid_bits);
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
}
}
}