#ifndef B3_GPU_PARALLEL_LINEAR_BVH_H #define B3_GPU_PARALLEL_LINEAR_BVH_H /* This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. */ //Initial Author Jackson Lee, 2014 //#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" #include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" #include "Bullet3Common/shared/b3Int2.h" #include "Bullet3Common/shared/b3Int4.h" #include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" #include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h" #define b3Int64 cl_long ///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame ///@remarks ///See presentation in docs/b3GpuParallelLinearBvh.pdf for algorithm details. ///@par ///Related papers: \n ///"Fast BVH Construction on GPUs" [Lauterbach et al. 2009] \n ///"Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d trees" [Karras 2012] \n ///@par ///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages: /// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid) /// - [fully parallel] Sort morton codes /// - [somewhat parallel] Build binary radix tree (assign parent/child pointers for internal nodes of the BVH) /// - [somewhat parallel] Set internal node AABBs ///@par ///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages. ///The BVH implementation here shares many concepts with [Karras 2012], but a different method is used for constructing the tree. ///Instead of searching for the child nodes of each internal node, we search for the parent node of each node. ///Additionally, a non-atomic traversal that starts from the leaf nodes and moves towards the root node is used to set the AABBs. class b3GpuParallelLinearBvh { cl_command_queue m_queue; cl_program m_parallelLinearBvhProgram; cl_kernel m_separateAabbsKernel; cl_kernel m_findAllNodesMergedAabbKernel; cl_kernel m_assignMortonCodesAndAabbIndiciesKernel; //Binary radix tree construction kernels cl_kernel m_computeAdjacentPairCommonPrefixKernel; cl_kernel m_buildBinaryRadixTreeLeafNodesKernel; cl_kernel m_buildBinaryRadixTreeInternalNodesKernel; cl_kernel m_findDistanceFromRootKernel; cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel; cl_kernel m_findLeafIndexRangesKernel; //Traversal kernels cl_kernel m_plbvhCalculateOverlappingPairsKernel; cl_kernel m_plbvhRayTraverseKernel; cl_kernel m_plbvhLargeAabbAabbTestKernel; cl_kernel m_plbvhLargeAabbRayTestKernel; b3RadixSort32CL m_radixSorter; //1 element b3OpenCLArray m_rootNodeIndex; //Most significant bit(0x80000000) is set to indicate internal node b3OpenCLArray m_maxDistanceFromRoot; //Max number of internal nodes between an internal node and the root node b3OpenCLArray m_temp; //Used to hold the number of pairs in calculateOverlappingPairs() //1 element per internal node (number_of_internal_nodes == number_of_leaves - 1) b3OpenCLArray m_internalNodeAabbs; b3OpenCLArray m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index b3OpenCLArray m_internalNodeChildNodes; //x == left child, y == right child; msb(0x80000000) is set to indicate internal node b3OpenCLArray m_internalNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal //1 element per internal node; for binary radix tree construction b3OpenCLArray m_commonPrefixes; b3OpenCLArray m_commonPrefixLengths; b3OpenCLArray m_distanceFromRoot; //Number of internal nodes between this node and the root //1 element per leaf node (leaf nodes only include small AABBs) b3OpenCLArray m_leafNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal b3OpenCLArray m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index in m_leafNodeAabbs b3OpenCLArray m_mergedAabb; //m_mergedAabb[0] contains the merged AABB of all leaf nodes b3OpenCLArray m_leafNodeAabbs; //Contains only small AABBs //1 element per large AABB, which is not stored in the BVH b3OpenCLArray m_largeAabbs; public: b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue); virtual ~b3GpuParallelLinearBvh(); ///Must be called before any other function void build(const b3OpenCLArray& worldSpaceAabbs, const b3OpenCLArray& smallAabbIndices, const b3OpenCLArray& largeAabbIndices); ///calculateOverlappingPairs() uses the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs. ///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs. ///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized. void calculateOverlappingPairs(b3OpenCLArray& out_overlappingPairs); ///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections; ///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough. ///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index. ///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded. void testRaysAgainstBvhAabbs(const b3OpenCLArray& rays, b3OpenCLArray& out_numRayRigidPairs, b3OpenCLArray& out_rayRigidPairs); private: void constructBinaryRadixTree(); }; #endif