GRL, or Graphics Library for Ray-tracing is a library we share with the Windows drivers for doing BVH builds on the GPU. It consists of a few headers shared between CL and C code, a bunch of CL kernels, and some GRL meta-kernels in their own format. Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16970>
430 lines
13 KiB
C
430 lines
13 KiB
C
//
|
|
// Copyright (C) 2009-2021 Intel Corporation
|
|
//
|
|
// SPDX-License-Identifier: MIT
|
|
//
|
|
//
|
|
|
|
#pragma once
|
|
|
|
#include "shared.h"
|
|
#include "intrinsics.h"
|
|
#include "AABB.h"
|
|
#include "AABB3f.h"
|
|
#include "qbvh6.h"
|
|
|
|
/* ====== BVH_BUILDER config ====== */
|
|
|
|
__constant const float cfg_intCost = 4.0f;
|
|
__constant const float cfg_travCost = 1.0f;
|
|
__constant const uint cfg_minLeafSize = BVH_LEAF_N_MIN;
|
|
__constant const uint cfg_maxLeafSize = BVH_LEAF_N_MAX;
|
|
__constant const uint cfg_maxDepth = BUILDRECORD_STACK_SIZE;
|
|
|
|
#define ENABLE_CONVERSION_CHECKS 0
|
|
|
|
#ifdef ENABLE_BIG_REG_ANNOTATION
|
|
#define GRL_ANNOTATE_BIG_REG_REQ __attribute__((annotate("num-thread-per-eu 4")))
|
|
#else
|
|
#define GRL_ANNOTATE_BIG_REG_REQ
|
|
#endif
|
|
|
|
#ifdef ENABLE_IGC_DO_NOT_SPILL
|
|
#define GRL_ANNOTATE_IGC_DO_NOT_SPILL __attribute__((annotate("igc-do-not-spill")))
|
|
#else
|
|
#define GRL_ANNOTATE_IGC_DO_NOT_SPILL
|
|
#endif
|
|
|
|
#define ERROR()
|
|
|
|
/* =================================================================================================================================================== */
|
|
/* =================================================================================================================================================== */
|
|
/* =================================================================================================================================================== */
|
|
/* =================================================================================================================================================== */
|
|
|
|
GRL_INLINE unsigned int getNumLeafPrims(unsigned int offset)
|
|
{
|
|
return (offset & 0x7) - 3;
|
|
}
|
|
|
|
GRL_INLINE unsigned int getLeafOffset(unsigned int offset)
|
|
{
|
|
return offset & (~0x7);
|
|
}
|
|
|
|
GRL_INLINE float4 triangleNormal(const float4 v0, const float4 v1, const float4 v2)
|
|
{
|
|
const float4 a = v1 - v0;
|
|
const float4 b = v2 - v0;
|
|
return cross(a, b);
|
|
}
|
|
|
|
GRL_INLINE float areaTriangle(const float4 v0, const float4 v1, const float4 v2)
|
|
{
|
|
const float4 normal = triangleNormal(v0, v1, v2);
|
|
return length((float3)(normal.x, normal.y, normal.z)) * 0.5f;
|
|
}
|
|
|
|
GRL_INLINE float det2(const float2 a, const float2 b)
|
|
{
|
|
return a.x * b.y - a.y * b.x;
|
|
}
|
|
|
|
GRL_INLINE float areaProjectedTriangle(const float4 v0, const float4 v1, const float4 v2)
|
|
{
|
|
const float xy = 0.5f * fabs(det2(v1.xy - v0.xy, v2.xy - v0.xy));
|
|
const float yz = 0.5f * fabs(det2(v1.yz - v0.yz, v2.yz - v0.yz));
|
|
const float zx = 0.5f * fabs(det2(v1.zx - v0.zx, v2.zx - v0.zx));
|
|
return xy + yz + zx;
|
|
}
|
|
|
|
typedef struct Block64B {
|
|
char data[64];
|
|
} Block64B __attribute__((aligned(64)));
|
|
|
|
typedef char byte_align64B __attribute__((aligned(64)));
|
|
|
|
/* ====================================================================== */
|
|
/* ============================== GLOBALS =============================== */
|
|
/* ====================================================================== */
|
|
|
|
GRL_INLINE bool Globals_OnFinish(global struct Globals *globals)
|
|
{
|
|
/* last active HW thread ? */
|
|
if (get_local_id(0) == 0)
|
|
{
|
|
const uint sync = atomic_add(&globals->sync, 1);
|
|
if (sync + 1 == get_num_groups(0))
|
|
{
|
|
globals->sync = 0;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
GRL_INLINE uint BlockAllocator_BytesUsed(struct BlockAllocator *p)
|
|
{
|
|
return p->cur - p->start;
|
|
};
|
|
|
|
GRL_INLINE uint BlockAllocator_Alloc(__global struct BlockAllocator *p, const uint size)
|
|
{
|
|
return atomic_add(&p->cur, size);
|
|
}
|
|
|
|
GRL_INLINE uint BlockAllocator_Alloc_Single(__global struct BlockAllocator *p, const uint size)
|
|
{
|
|
uint offset = 0;
|
|
if (get_sub_group_local_id() == 0)
|
|
offset = atomic_add(&p->cur, size);
|
|
return sub_group_broadcast(offset, 0);
|
|
}
|
|
|
|
// node allocation returns an offset from beginning of BVH to allocated node
|
|
// in multiples of 64B
|
|
GRL_INLINE uint allocate_inner_nodes(global struct BVHBase* base, uint num_nodes )
|
|
{
|
|
return atomic_add_global( &base->nodeDataCur, num_nodes );
|
|
}
|
|
GRL_INLINE uint allocate_procedural_leaves(global struct BVHBase* base, uint num_nodes)
|
|
{
|
|
return atomic_add_global(&base->proceduralDataCur, num_nodes);
|
|
}
|
|
|
|
GRL_INLINE uint allocate_quad_leaves(global struct BVHBase* base, uint num_nodes)
|
|
{
|
|
return atomic_add_global(&base->quadLeafCur, num_nodes);
|
|
}
|
|
|
|
#if 0
|
|
GRL_INLINE uint alloc_node_mem(global struct Globals *globals, const uint size)
|
|
{
|
|
const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
|
|
return BlockAllocator_Alloc(&globals->node_mem_allocator, aligned_size);
|
|
}
|
|
|
|
GRL_INLINE uint alloc_single_node_mem(global struct Globals *globals, const uint size)
|
|
{
|
|
const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
|
|
return BlockAllocator_Alloc_Single(&globals->node_mem_allocator, aligned_size);
|
|
}
|
|
|
|
GRL_INLINE uint alloc_quad_leaf_mem(global struct Globals *globals, const uint size)
|
|
{
|
|
const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
|
|
return BlockAllocator_Alloc(&globals->quad_mem_allocator, aligned_size);
|
|
}
|
|
|
|
GRL_INLINE uint alloc_procedural_leaf_mem(global struct Globals *globals, const uint size)
|
|
{
|
|
const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
|
|
return BlockAllocator_Alloc(&globals->procedural_mem_allocator, aligned_size);
|
|
}
|
|
#endif
|
|
|
|
GRL_INLINE global struct BuildRecord *getBuildRecords(char *bvh_mem, struct Globals *globals)
|
|
{
|
|
return (global struct BuildRecord *)(bvh_mem + globals->build_record_start);
|
|
}
|
|
|
|
/* ======================================================================= */
|
|
/* ============================== TRIANGLE =============================== */
|
|
/* ======================================================================= */
|
|
|
|
/*GRL_INLINE void printTriangle(struct Triangle *t)
|
|
{
|
|
printf("vtx[0] %d vtx[1] %d vtx[2] %d primID %d geomID %d \n",t->vtx[0],t->vtx[1],t->vtx[2],t->primID,t->geomID);
|
|
}*/
|
|
|
|
/* ==================================================================== */
|
|
/* ============================== SPLIT =============================== */
|
|
/* ==================================================================== */
|
|
|
|
GRL_INLINE void printSplit(struct Split *split)
|
|
{
|
|
printf("split sah %f dim %d pos %d \n", split->sah, split->dim, split->pos);
|
|
}
|
|
|
|
/* ========================================================================== */
|
|
/* ============================== BUILDRECORD =============================== */
|
|
/* ========================================================================== */
|
|
|
|
GRL_INLINE void initBuildRecord(struct BuildRecord *buildRecord, uint start, uint end)
|
|
{
|
|
AABB_init(&buildRecord->centroidBounds);
|
|
buildRecord->start = start;
|
|
buildRecord->end = end;
|
|
}
|
|
|
|
GRL_INLINE void extendBuildRecord(struct BuildRecord *buildRecord, struct AABB *primref)
|
|
{
|
|
AABB_extend_point(&buildRecord->centroidBounds, AABB_centroid2(primref));
|
|
}
|
|
|
|
GRL_INLINE uint getBuildRecursionDepth(struct BuildRecord *buildRecord)
|
|
{
|
|
return as_uint(buildRecord->centroidBounds.upper.w);
|
|
}
|
|
|
|
GRL_INLINE void setBuildRecursionDepth(struct BuildRecord *buildRecord, uint depth)
|
|
{
|
|
buildRecord->centroidBounds.upper.w = as_float(depth);
|
|
}
|
|
|
|
GRL_INLINE uint getNumPrimsBuildRecord(struct BuildRecord *buildRecord)
|
|
{
|
|
return buildRecord->end - buildRecord->start;
|
|
}
|
|
|
|
/* ========================================================================== */
|
|
/* =================== BinaryMortonCodeHierarchy ============================= */
|
|
/* ========================================================================== */
|
|
|
|
GRL_INLINE void BinaryMortonCodeHierarchy_init(struct BinaryMortonCodeHierarchy *record, uint start, uint end)
|
|
{
|
|
record->range.start = start;
|
|
record->range.end = end;
|
|
record->leftChild = -1;
|
|
record->rightChild = -1;
|
|
// record->flag = 0;
|
|
}
|
|
|
|
GRL_INLINE uint BinaryMortonCodeHierarchy_getNumPrimitives(global struct BinaryMortonCodeHierarchy *nodes, uint nodeID)
|
|
{
|
|
/* leaf case */
|
|
if (nodeID & (uint)(1 << 31))
|
|
return 1;
|
|
|
|
/* inner node case*/
|
|
else
|
|
return nodes[nodeID].range.end - nodes[nodeID].range.start + 1;
|
|
}
|
|
|
|
GRL_INLINE struct BinaryMortonCodeHierarchy BinaryMortonCodeHierarchy_getEntry(global struct BinaryMortonCodeHierarchy* nodes, uint nodeID)
|
|
{
|
|
struct BinaryMortonCodeHierarchy entry;
|
|
|
|
if (nodeID & (uint)(1 << 31)) {
|
|
/* leaf case */
|
|
uint rangeStart = nodeID ^ (uint)(1 << 31);
|
|
BinaryMortonCodeHierarchy_init(&entry, rangeStart, rangeStart);
|
|
}
|
|
else {
|
|
/* inner node case*/
|
|
entry = nodes[nodeID];
|
|
}
|
|
|
|
return entry;
|
|
}
|
|
|
|
GRL_INLINE uint BinaryMortonCodeHierarchy_getRangeStart(global struct BinaryMortonCodeHierarchy *nodes, uint nodeID)
|
|
{
|
|
/* leaf case */
|
|
if (nodeID & (uint)(1 << 31))
|
|
return nodeID ^ (uint)(1 << 31);
|
|
|
|
/* inner node case*/
|
|
else
|
|
return nodes[nodeID].range.start;
|
|
}
|
|
|
|
/* ==================================================================== */
|
|
/* ============================== RANGE =============================== */
|
|
/* ==================================================================== */
|
|
|
|
GRL_INLINE void printRange(struct Range *range)
|
|
{
|
|
printf("start %d end %d \n", range->start, range->end);
|
|
}
|
|
|
|
GRL_INLINE bool equalRange(struct Range *range0, struct Range *range1)
|
|
{
|
|
if (range0->start == range1->start &&
|
|
range0->end == range1->end)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
GRL_INLINE uint getSizeRange(struct Range *range)
|
|
{
|
|
return range->end - range->start;
|
|
}
|
|
|
|
/* ==================================================================== */
|
|
/* ========================= ProceduralLeaf =========================== */
|
|
/* ==================================================================== */
|
|
|
|
#if 0
|
|
struct ProceduralLeaf
|
|
{
|
|
uint shaderIndex_geomMask;
|
|
uint geomIndex_flags;
|
|
uint N_last;
|
|
uint primIndex[13];
|
|
};
|
|
#endif
|
|
|
|
GRL_INLINE uint ProceduralLeaf_geomIndex(global struct ProceduralLeaf *This)
|
|
{
|
|
return This->leafDesc.geomIndex_flags & 0x1FFFFFFF;
|
|
}
|
|
|
|
GRL_INLINE uint ProceduralLeaf_primIndex(global struct ProceduralLeaf *This, uint i)
|
|
{
|
|
//assert(i < N);
|
|
return This->_primIndex[i];
|
|
}
|
|
|
|
/* ==================================================================== */
|
|
/* =========================== TrianglePair =========================== */
|
|
/* ==================================================================== */
|
|
|
|
struct TrianglePair
|
|
{
|
|
uint4 a; // indices of the 4 verts to store in the quad
|
|
uint3 lb; // index of the second triangle's verts in 'a'
|
|
};
|
|
|
|
GRL_INLINE struct TrianglePair TrianglePair_Constructor(uint3 tri0, uint primID0, uint3 tri1, uint primID1)
|
|
{
|
|
struct TrianglePair q;
|
|
q.a.x = tri0.x;
|
|
q.a.y = tri0.y;
|
|
q.a.z = tri0.z;
|
|
q.a.w = tri0.z;
|
|
|
|
uint3 b;
|
|
b.x = tri1.x;
|
|
b.y = tri1.y;
|
|
b.z = tri1.z;
|
|
|
|
q.lb = (uint3)(3);
|
|
|
|
q.lb.x = (b.x == q.a.x) ? 0 : q.lb.x;
|
|
q.lb.y = (b.y == q.a.x) ? 0 : q.lb.y;
|
|
q.lb.z = (b.z == q.a.x) ? 0 : q.lb.z;
|
|
|
|
q.lb.x = (b.x == q.a.y) ? 1 : q.lb.x;
|
|
q.lb.y = (b.y == q.a.y) ? 1 : q.lb.y;
|
|
q.lb.z = (b.z == q.a.y) ? 1 : q.lb.z;
|
|
|
|
q.lb.x = (b.x == q.a.z) ? 2 : q.lb.x;
|
|
q.lb.y = (b.y == q.a.z) ? 2 : q.lb.y;
|
|
q.lb.z = (b.z == q.a.z) ? 2 : q.lb.z;
|
|
|
|
q.lb.x = (primID0 != primID1) ? q.lb.x : 0;
|
|
q.lb.y = (primID0 != primID1) ? q.lb.y : 0;
|
|
q.lb.z = (primID0 != primID1) ? q.lb.z : 0;
|
|
|
|
q.a.w = (q.lb.x == 3) ? b.x : q.a.w;
|
|
q.a.w = (q.lb.y == 3) ? b.y : q.a.w;
|
|
q.a.w = (q.lb.z == 3) ? b.z : q.a.w;
|
|
|
|
return q;
|
|
}
|
|
|
|
GRL_INLINE float InstanceDesc_get_transform(const InstanceDesc *d, const uint32_t row, const uint32_t column)
|
|
{
|
|
return d->Transform[row][column];
|
|
}
|
|
|
|
GRL_INLINE uint32_t InstanceDesc_get_instanceID(const InstanceDesc *d)
|
|
{
|
|
return d->InstanceIDAndMask & (0x00FFFFFF);
|
|
}
|
|
|
|
GRL_INLINE uint32_t InstanceDesc_get_InstanceMask(const InstanceDesc *d)
|
|
{
|
|
return d->InstanceIDAndMask >> 24;
|
|
}
|
|
|
|
GRL_INLINE uint32_t InstanceDesc_get_InstanceContributionToHitGroupIndex(const InstanceDesc *d)
|
|
{
|
|
return d->InstanceContributionToHitGroupIndexAndFlags & ((1 << 24) - 1);
|
|
}
|
|
|
|
GRL_INLINE uint32_t InstanceDesc_get_InstanceFlags(const InstanceDesc *d)
|
|
{
|
|
return d->InstanceContributionToHitGroupIndexAndFlags >> 24;
|
|
}
|
|
|
|
GRL_INLINE gpuva_t InstanceDesc_get_AccelerationStructure(const InstanceDesc *d)
|
|
{
|
|
return d->AccelerationStructureGPUVA;
|
|
}
|
|
|
|
GRL_INLINE void InstanceDesc_set_transform(InstanceDesc *d, const uint32_t row, const uint32_t column, float value)
|
|
{
|
|
d->Transform[row][column] = value;
|
|
}
|
|
|
|
GRL_INLINE void InstanceDesc_set_instanceID(InstanceDesc *d, const uint32_t id)
|
|
{
|
|
d->InstanceIDAndMask &= 255 << 24;
|
|
d->InstanceIDAndMask |= id & ((1 << 24) - 1);
|
|
}
|
|
|
|
GRL_INLINE void InstanceDesc_set_InstanceMask(InstanceDesc *d, const uint32_t mask)
|
|
{
|
|
d->InstanceIDAndMask &= ((1 << 24) - 1);
|
|
d->InstanceIDAndMask |= mask << 24;
|
|
}
|
|
|
|
GRL_INLINE void InstanceDesc_set_InstanceContributionToHitGroupIndex(InstanceDesc *d, const uint32_t contribution)
|
|
{
|
|
d->InstanceContributionToHitGroupIndexAndFlags &= 255 << 24;
|
|
d->InstanceContributionToHitGroupIndexAndFlags |= contribution & ((1 << 24) - 1);
|
|
}
|
|
|
|
GRL_INLINE void InstanceDesc_set_InstanceFlags(InstanceDesc *d, const uint32_t flags)
|
|
{
|
|
d->InstanceContributionToHitGroupIndexAndFlags &= ((1 << 24) - 1);
|
|
d->InstanceContributionToHitGroupIndexAndFlags |= flags << 24;
|
|
}
|
|
|
|
GRL_INLINE void InstanceDesc_set_AccelerationStructure(InstanceDesc *d, gpuva_t address)
|
|
{
|
|
d->AccelerationStructureGPUVA = address;
|
|
}
|