Files
mesa/src/intel/vulkan/grl/gpu/common.h
Jason Ekstrand 5f948503e4 anv: Import GRL
GRL, or Graphics Library for Ray-tracing is a library we share with the
Windows drivers for doing BVH builds on the GPU.  It consists of a few
headers shared between CL and C code, a bunch of CL kernels, and some
GRL meta-kernels in their own format.

Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16970>
2022-09-28 05:38:37 +00:00

430 lines
13 KiB
C

//
// Copyright (C) 2009-2021 Intel Corporation
//
// SPDX-License-Identifier: MIT
//
//
#pragma once
#include "shared.h"
#include "intrinsics.h"
#include "AABB.h"
#include "AABB3f.h"
#include "qbvh6.h"
/* ====== BVH_BUILDER config ====== */
__constant const float cfg_intCost = 4.0f;
__constant const float cfg_travCost = 1.0f;
__constant const uint cfg_minLeafSize = BVH_LEAF_N_MIN;
__constant const uint cfg_maxLeafSize = BVH_LEAF_N_MAX;
__constant const uint cfg_maxDepth = BUILDRECORD_STACK_SIZE;
#define ENABLE_CONVERSION_CHECKS 0
#ifdef ENABLE_BIG_REG_ANNOTATION
#define GRL_ANNOTATE_BIG_REG_REQ __attribute__((annotate("num-thread-per-eu 4")))
#else
#define GRL_ANNOTATE_BIG_REG_REQ
#endif
#ifdef ENABLE_IGC_DO_NOT_SPILL
#define GRL_ANNOTATE_IGC_DO_NOT_SPILL __attribute__((annotate("igc-do-not-spill")))
#else
#define GRL_ANNOTATE_IGC_DO_NOT_SPILL
#endif
#define ERROR()
/* =================================================================================================================================================== */
/* =================================================================================================================================================== */
/* =================================================================================================================================================== */
/* =================================================================================================================================================== */
GRL_INLINE unsigned int getNumLeafPrims(unsigned int offset)
{
return (offset & 0x7) - 3;
}
GRL_INLINE unsigned int getLeafOffset(unsigned int offset)
{
return offset & (~0x7);
}
GRL_INLINE float4 triangleNormal(const float4 v0, const float4 v1, const float4 v2)
{
const float4 a = v1 - v0;
const float4 b = v2 - v0;
return cross(a, b);
}
GRL_INLINE float areaTriangle(const float4 v0, const float4 v1, const float4 v2)
{
const float4 normal = triangleNormal(v0, v1, v2);
return length((float3)(normal.x, normal.y, normal.z)) * 0.5f;
}
GRL_INLINE float det2(const float2 a, const float2 b)
{
return a.x * b.y - a.y * b.x;
}
GRL_INLINE float areaProjectedTriangle(const float4 v0, const float4 v1, const float4 v2)
{
const float xy = 0.5f * fabs(det2(v1.xy - v0.xy, v2.xy - v0.xy));
const float yz = 0.5f * fabs(det2(v1.yz - v0.yz, v2.yz - v0.yz));
const float zx = 0.5f * fabs(det2(v1.zx - v0.zx, v2.zx - v0.zx));
return xy + yz + zx;
}
typedef struct Block64B {
char data[64];
} Block64B __attribute__((aligned(64)));
typedef char byte_align64B __attribute__((aligned(64)));
/* ====================================================================== */
/* ============================== GLOBALS =============================== */
/* ====================================================================== */
GRL_INLINE bool Globals_OnFinish(global struct Globals *globals)
{
/* last active HW thread ? */
if (get_local_id(0) == 0)
{
const uint sync = atomic_add(&globals->sync, 1);
if (sync + 1 == get_num_groups(0))
{
globals->sync = 0;
return true;
}
}
return false;
}
GRL_INLINE uint BlockAllocator_BytesUsed(struct BlockAllocator *p)
{
return p->cur - p->start;
};
GRL_INLINE uint BlockAllocator_Alloc(__global struct BlockAllocator *p, const uint size)
{
return atomic_add(&p->cur, size);
}
GRL_INLINE uint BlockAllocator_Alloc_Single(__global struct BlockAllocator *p, const uint size)
{
uint offset = 0;
if (get_sub_group_local_id() == 0)
offset = atomic_add(&p->cur, size);
return sub_group_broadcast(offset, 0);
}
// node allocation returns an offset from beginning of BVH to allocated node
// in multiples of 64B
GRL_INLINE uint allocate_inner_nodes(global struct BVHBase* base, uint num_nodes )
{
return atomic_add_global( &base->nodeDataCur, num_nodes );
}
GRL_INLINE uint allocate_procedural_leaves(global struct BVHBase* base, uint num_nodes)
{
return atomic_add_global(&base->proceduralDataCur, num_nodes);
}
GRL_INLINE uint allocate_quad_leaves(global struct BVHBase* base, uint num_nodes)
{
return atomic_add_global(&base->quadLeafCur, num_nodes);
}
#if 0
GRL_INLINE uint alloc_node_mem(global struct Globals *globals, const uint size)
{
const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
return BlockAllocator_Alloc(&globals->node_mem_allocator, aligned_size);
}
GRL_INLINE uint alloc_single_node_mem(global struct Globals *globals, const uint size)
{
const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
return BlockAllocator_Alloc_Single(&globals->node_mem_allocator, aligned_size);
}
GRL_INLINE uint alloc_quad_leaf_mem(global struct Globals *globals, const uint size)
{
const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
return BlockAllocator_Alloc(&globals->quad_mem_allocator, aligned_size);
}
GRL_INLINE uint alloc_procedural_leaf_mem(global struct Globals *globals, const uint size)
{
const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
return BlockAllocator_Alloc(&globals->procedural_mem_allocator, aligned_size);
}
#endif
GRL_INLINE global struct BuildRecord *getBuildRecords(char *bvh_mem, struct Globals *globals)
{
return (global struct BuildRecord *)(bvh_mem + globals->build_record_start);
}
/* ======================================================================= */
/* ============================== TRIANGLE =============================== */
/* ======================================================================= */
/*GRL_INLINE void printTriangle(struct Triangle *t)
{
printf("vtx[0] %d vtx[1] %d vtx[2] %d primID %d geomID %d \n",t->vtx[0],t->vtx[1],t->vtx[2],t->primID,t->geomID);
}*/
/* ==================================================================== */
/* ============================== SPLIT =============================== */
/* ==================================================================== */
GRL_INLINE void printSplit(struct Split *split)
{
printf("split sah %f dim %d pos %d \n", split->sah, split->dim, split->pos);
}
/* ========================================================================== */
/* ============================== BUILDRECORD =============================== */
/* ========================================================================== */
GRL_INLINE void initBuildRecord(struct BuildRecord *buildRecord, uint start, uint end)
{
AABB_init(&buildRecord->centroidBounds);
buildRecord->start = start;
buildRecord->end = end;
}
GRL_INLINE void extendBuildRecord(struct BuildRecord *buildRecord, struct AABB *primref)
{
AABB_extend_point(&buildRecord->centroidBounds, AABB_centroid2(primref));
}
GRL_INLINE uint getBuildRecursionDepth(struct BuildRecord *buildRecord)
{
return as_uint(buildRecord->centroidBounds.upper.w);
}
GRL_INLINE void setBuildRecursionDepth(struct BuildRecord *buildRecord, uint depth)
{
buildRecord->centroidBounds.upper.w = as_float(depth);
}
GRL_INLINE uint getNumPrimsBuildRecord(struct BuildRecord *buildRecord)
{
return buildRecord->end - buildRecord->start;
}
/* ========================================================================== */
/* =================== BinaryMortonCodeHierarchy ============================= */
/* ========================================================================== */
GRL_INLINE void BinaryMortonCodeHierarchy_init(struct BinaryMortonCodeHierarchy *record, uint start, uint end)
{
record->range.start = start;
record->range.end = end;
record->leftChild = -1;
record->rightChild = -1;
// record->flag = 0;
}
GRL_INLINE uint BinaryMortonCodeHierarchy_getNumPrimitives(global struct BinaryMortonCodeHierarchy *nodes, uint nodeID)
{
/* leaf case */
if (nodeID & (uint)(1 << 31))
return 1;
/* inner node case*/
else
return nodes[nodeID].range.end - nodes[nodeID].range.start + 1;
}
GRL_INLINE struct BinaryMortonCodeHierarchy BinaryMortonCodeHierarchy_getEntry(global struct BinaryMortonCodeHierarchy* nodes, uint nodeID)
{
struct BinaryMortonCodeHierarchy entry;
if (nodeID & (uint)(1 << 31)) {
/* leaf case */
uint rangeStart = nodeID ^ (uint)(1 << 31);
BinaryMortonCodeHierarchy_init(&entry, rangeStart, rangeStart);
}
else {
/* inner node case*/
entry = nodes[nodeID];
}
return entry;
}
GRL_INLINE uint BinaryMortonCodeHierarchy_getRangeStart(global struct BinaryMortonCodeHierarchy *nodes, uint nodeID)
{
/* leaf case */
if (nodeID & (uint)(1 << 31))
return nodeID ^ (uint)(1 << 31);
/* inner node case*/
else
return nodes[nodeID].range.start;
}
/* ==================================================================== */
/* ============================== RANGE =============================== */
/* ==================================================================== */
GRL_INLINE void printRange(struct Range *range)
{
printf("start %d end %d \n", range->start, range->end);
}
GRL_INLINE bool equalRange(struct Range *range0, struct Range *range1)
{
if (range0->start == range1->start &&
range0->end == range1->end)
return true;
return false;
}
GRL_INLINE uint getSizeRange(struct Range *range)
{
return range->end - range->start;
}
/* ==================================================================== */
/* ========================= ProceduralLeaf =========================== */
/* ==================================================================== */
#if 0
struct ProceduralLeaf
{
uint shaderIndex_geomMask;
uint geomIndex_flags;
uint N_last;
uint primIndex[13];
};
#endif
GRL_INLINE uint ProceduralLeaf_geomIndex(global struct ProceduralLeaf *This)
{
return This->leafDesc.geomIndex_flags & 0x1FFFFFFF;
}
GRL_INLINE uint ProceduralLeaf_primIndex(global struct ProceduralLeaf *This, uint i)
{
//assert(i < N);
return This->_primIndex[i];
}
/* ==================================================================== */
/* =========================== TrianglePair =========================== */
/* ==================================================================== */
struct TrianglePair
{
uint4 a; // indices of the 4 verts to store in the quad
uint3 lb; // index of the second triangle's verts in 'a'
};
GRL_INLINE struct TrianglePair TrianglePair_Constructor(uint3 tri0, uint primID0, uint3 tri1, uint primID1)
{
struct TrianglePair q;
q.a.x = tri0.x;
q.a.y = tri0.y;
q.a.z = tri0.z;
q.a.w = tri0.z;
uint3 b;
b.x = tri1.x;
b.y = tri1.y;
b.z = tri1.z;
q.lb = (uint3)(3);
q.lb.x = (b.x == q.a.x) ? 0 : q.lb.x;
q.lb.y = (b.y == q.a.x) ? 0 : q.lb.y;
q.lb.z = (b.z == q.a.x) ? 0 : q.lb.z;
q.lb.x = (b.x == q.a.y) ? 1 : q.lb.x;
q.lb.y = (b.y == q.a.y) ? 1 : q.lb.y;
q.lb.z = (b.z == q.a.y) ? 1 : q.lb.z;
q.lb.x = (b.x == q.a.z) ? 2 : q.lb.x;
q.lb.y = (b.y == q.a.z) ? 2 : q.lb.y;
q.lb.z = (b.z == q.a.z) ? 2 : q.lb.z;
q.lb.x = (primID0 != primID1) ? q.lb.x : 0;
q.lb.y = (primID0 != primID1) ? q.lb.y : 0;
q.lb.z = (primID0 != primID1) ? q.lb.z : 0;
q.a.w = (q.lb.x == 3) ? b.x : q.a.w;
q.a.w = (q.lb.y == 3) ? b.y : q.a.w;
q.a.w = (q.lb.z == 3) ? b.z : q.a.w;
return q;
}
GRL_INLINE float InstanceDesc_get_transform(const InstanceDesc *d, const uint32_t row, const uint32_t column)
{
return d->Transform[row][column];
}
GRL_INLINE uint32_t InstanceDesc_get_instanceID(const InstanceDesc *d)
{
return d->InstanceIDAndMask & (0x00FFFFFF);
}
GRL_INLINE uint32_t InstanceDesc_get_InstanceMask(const InstanceDesc *d)
{
return d->InstanceIDAndMask >> 24;
}
GRL_INLINE uint32_t InstanceDesc_get_InstanceContributionToHitGroupIndex(const InstanceDesc *d)
{
return d->InstanceContributionToHitGroupIndexAndFlags & ((1 << 24) - 1);
}
GRL_INLINE uint32_t InstanceDesc_get_InstanceFlags(const InstanceDesc *d)
{
return d->InstanceContributionToHitGroupIndexAndFlags >> 24;
}
GRL_INLINE gpuva_t InstanceDesc_get_AccelerationStructure(const InstanceDesc *d)
{
return d->AccelerationStructureGPUVA;
}
GRL_INLINE void InstanceDesc_set_transform(InstanceDesc *d, const uint32_t row, const uint32_t column, float value)
{
d->Transform[row][column] = value;
}
GRL_INLINE void InstanceDesc_set_instanceID(InstanceDesc *d, const uint32_t id)
{
d->InstanceIDAndMask &= 255 << 24;
d->InstanceIDAndMask |= id & ((1 << 24) - 1);
}
GRL_INLINE void InstanceDesc_set_InstanceMask(InstanceDesc *d, const uint32_t mask)
{
d->InstanceIDAndMask &= ((1 << 24) - 1);
d->InstanceIDAndMask |= mask << 24;
}
GRL_INLINE void InstanceDesc_set_InstanceContributionToHitGroupIndex(InstanceDesc *d, const uint32_t contribution)
{
d->InstanceContributionToHitGroupIndexAndFlags &= 255 << 24;
d->InstanceContributionToHitGroupIndexAndFlags |= contribution & ((1 << 24) - 1);
}
GRL_INLINE void InstanceDesc_set_InstanceFlags(InstanceDesc *d, const uint32_t flags)
{
d->InstanceContributionToHitGroupIndexAndFlags &= ((1 << 24) - 1);
d->InstanceContributionToHitGroupIndexAndFlags |= flags << 24;
}
GRL_INLINE void InstanceDesc_set_AccelerationStructure(InstanceDesc *d, gpuva_t address)
{
d->AccelerationStructureGPUVA = address;
}