godot/thirdparty/embree/kernels/common/ray.h

// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "default.h"
#include "instance_stack.h"

// FIXME: if ray gets separated into ray* and hit, uload4 needs to be adjusted

namespace embree
{
  /* Ray structure for K rays */
  template<int K>
  struct RayK
  {};

  /* Ray+hit structure for K rays */
  template<int K>
  struct RayHitK : RayK<K>
  {};

  /* Specialization for a single ray */
  template<>
  struct RayK<1>
  {};

  template<>
  struct RayHitK<1> : RayK<1>
  {};

  /* Converts ray packet to single rays */
  template<int K>
  __forceinline void RayK<K>::get(RayK<1>* ray) const
  {}

  template<int K>
  __forceinline void RayHitK<K>::get(RayHitK<1>* ray) const
  {}

  /* Extracts a single ray out of a ray packet*/
  template<int K>
  __forceinline void RayK<K>::get(size_t i, RayK<1>& ray) const
  {}

  template<int K>
  __forceinline void RayHitK<K>::get(size_t i, RayHitK<1>& ray) const
  {}

  /* Converts single rays to ray packet */
  template<int K>
  __forceinline void RayK<K>::set(const RayK<1>* ray)
  {}

  template<int K>
  __forceinline void RayHitK<K>::set(const RayHitK<1>* ray)
  {}

  /* inserts a single ray into a ray packet element */
  template<int K>
  __forceinline void RayK<K>::set(size_t i, const RayK<1>& ray)
  {}

  template<int K>
  __forceinline void RayHitK<K>::set(size_t i, const RayHitK<1>& ray)
  {}

  /* copies a ray packet element into another element*/
  template<int K>
  __forceinline void RayK<K>::copy(size_t dest, size_t source)
  {}

  template<int K>
  __forceinline void RayHitK<K>::copy(size_t dest, size_t source)
  {}

  /* Shortcuts */
  Ray;
  Ray4;
  Ray8;
  Ray16;
  Rayx;
  struct RayN;

  RayHit;
  RayHit4;
  RayHit8;
  RayHit16;
  RayHitx;
  struct RayHitN;

  template<int K, bool intersect>
  struct RayTypeHelper;

  RayTypeHelper<K, true>;

  RayTypeHelper<K, false>;

  RayType;

  RayTypeK;

  /* Outputs ray to stream */
  template<int K>
  __forceinline embree_ostream operator <<(embree_ostream cout, const RayK<K>& ray)
  {}

  template<int K>
  __forceinline embree_ostream operator <<(embree_ostream cout, const RayHitK<K>& ray)
  {}

  struct RayStreamSOA
  {};

  template<size_t MAX_K>
  struct StackRayStreamSOA : public RayStreamSOA
  {};


  struct RayStreamSOP
  {};


  struct RayStreamAOS
  {};
  
  template<>
  __forceinline Ray4 RayStreamAOS::getRayByOffset<4>(const vint4& offset)
  {}
  
#if defined(__AVX__)
  template<>
  __forceinline Ray8 RayStreamAOS::getRayByOffset<8>(const vint8& offset)
  {
    Ray8 ray;

    /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
    const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
    const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
    const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
    const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
    const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[4]))->org);
    const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[5]))->org);
    const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[6]))->org);
    const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[7]))->org);

    transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());

    /* load and transpose: tfar, mask, id, flags */
    const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
    const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
    const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
    const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
    const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[4]))->tfar);
    const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[5]))->tfar);
    const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[6]))->tfar);
    const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[7]))->tfar);

    vfloat8 maskf, idf, flagsf;
    transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
    ray.mask  = asInt(maskf);
    ray.id    = asInt(idf);
    ray.flags = asInt(flagsf);

    return ray;
  }
#endif

#if defined(__AVX512F__)
  template<>
  __forceinline Ray16 RayStreamAOS::getRayByOffset<16>(const vint16& offset)
  {
    Ray16 ray;

    /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
    const vfloat8 ab0  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 0]))->org);
    const vfloat8 ab1  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 1]))->org);
    const vfloat8 ab2  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 2]))->org);
    const vfloat8 ab3  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 3]))->org);
    const vfloat8 ab4  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 4]))->org);
    const vfloat8 ab5  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 5]))->org);
    const vfloat8 ab6  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 6]))->org);
    const vfloat8 ab7  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 7]))->org);
    const vfloat8 ab8  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 8]))->org);
    const vfloat8 ab9  = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 9]))->org);
    const vfloat8 ab10 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[10]))->org);
    const vfloat8 ab11 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[11]))->org);
    const vfloat8 ab12 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[12]))->org);
    const vfloat8 ab13 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[13]))->org);
    const vfloat8 ab14 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[14]))->org);
    const vfloat8 ab15 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[15]))->org);

    transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
              ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());

    /* load and transpose: tfar, mask, id, flags */
    const vfloat4 c0  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 0]))->tfar);
    const vfloat4 c1  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 1]))->tfar);
    const vfloat4 c2  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 2]))->tfar);
    const vfloat4 c3  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 3]))->tfar);
    const vfloat4 c4  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 4]))->tfar);
    const vfloat4 c5  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 5]))->tfar);
    const vfloat4 c6  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 6]))->tfar);
    const vfloat4 c7  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 7]))->tfar);
    const vfloat4 c8  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 8]))->tfar);
    const vfloat4 c9  = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 9]))->tfar);
    const vfloat4 c10 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[10]))->tfar);
    const vfloat4 c11 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[11]))->tfar);
    const vfloat4 c12 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[12]))->tfar);
    const vfloat4 c13 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[13]))->tfar);
    const vfloat4 c14 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[14]))->tfar);
    const vfloat4 c15 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[15]))->tfar);

    vfloat16 maskf, idf, flagsf;
    transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
              ray.tfar, maskf, idf, flagsf);
    ray.mask  = asInt(maskf);
    ray.id    = asInt(idf);
    ray.flags = asInt(flagsf);

    return ray;
  }
#endif


  struct RayStreamAOP
  {};
  
  template<>
  __forceinline Ray4 RayStreamAOP::getRayByIndex<4>(const vint4& index)
  {}
  
#if defined(__AVX__)
  template<>
  __forceinline Ray8 RayStreamAOP::getRayByIndex<8>(const vint8& index)
  {
    Ray8 ray;

    /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
    const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
    const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
    const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
    const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
    const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
    const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
    const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
    const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);

    transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());

    /* load and transpose: tfar, mask, id, flags */
    const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
    const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
    const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
    const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
    const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
    const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
    const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
    const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);

    vfloat8 maskf, idf, flagsf;
    transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
    ray.mask  = asInt(maskf);
    ray.id    = asInt(idf);
    ray.flags = asInt(flagsf);

    return ray;
  }
#endif

#if defined(__AVX512F__)
  template<>
  __forceinline Ray16 RayStreamAOP::getRayByIndex<16>(const vint16& index)
  {
    Ray16 ray;

    /* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
    const vfloat8 ab0  = vfloat8::loadu(&ptr[index[0]]->org);
    const vfloat8 ab1  = vfloat8::loadu(&ptr[index[1]]->org);
    const vfloat8 ab2  = vfloat8::loadu(&ptr[index[2]]->org);
    const vfloat8 ab3  = vfloat8::loadu(&ptr[index[3]]->org);
    const vfloat8 ab4  = vfloat8::loadu(&ptr[index[4]]->org);
    const vfloat8 ab5  = vfloat8::loadu(&ptr[index[5]]->org);
    const vfloat8 ab6  = vfloat8::loadu(&ptr[index[6]]->org);
    const vfloat8 ab7  = vfloat8::loadu(&ptr[index[7]]->org);
    const vfloat8 ab8  = vfloat8::loadu(&ptr[index[8]]->org);
    const vfloat8 ab9  = vfloat8::loadu(&ptr[index[9]]->org);
    const vfloat8 ab10 = vfloat8::loadu(&ptr[index[10]]->org);
    const vfloat8 ab11 = vfloat8::loadu(&ptr[index[11]]->org);
    const vfloat8 ab12 = vfloat8::loadu(&ptr[index[12]]->org);
    const vfloat8 ab13 = vfloat8::loadu(&ptr[index[13]]->org);
    const vfloat8 ab14 = vfloat8::loadu(&ptr[index[14]]->org);
    const vfloat8 ab15 = vfloat8::loadu(&ptr[index[15]]->org);

    transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
              ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());

    /* load and transpose: tfar, mask, id, flags */
    const vfloat4 c0  = vfloat4::loadu(&ptr[index[0]]->tfar);
    const vfloat4 c1  = vfloat4::loadu(&ptr[index[1]]->tfar);
    const vfloat4 c2  = vfloat4::loadu(&ptr[index[2]]->tfar);
    const vfloat4 c3  = vfloat4::loadu(&ptr[index[3]]->tfar);
    const vfloat4 c4  = vfloat4::loadu(&ptr[index[4]]->tfar);
    const vfloat4 c5  = vfloat4::loadu(&ptr[index[5]]->tfar);
    const vfloat4 c6  = vfloat4::loadu(&ptr[index[6]]->tfar);
    const vfloat4 c7  = vfloat4::loadu(&ptr[index[7]]->tfar);
    const vfloat4 c8  = vfloat4::loadu(&ptr[index[8]]->tfar);
    const vfloat4 c9  = vfloat4::loadu(&ptr[index[9]]->tfar);
    const vfloat4 c10 = vfloat4::loadu(&ptr[index[10]]->tfar);
    const vfloat4 c11 = vfloat4::loadu(&ptr[index[11]]->tfar);
    const vfloat4 c12 = vfloat4::loadu(&ptr[index[12]]->tfar);
    const vfloat4 c13 = vfloat4::loadu(&ptr[index[13]]->tfar);
    const vfloat4 c14 = vfloat4::loadu(&ptr[index[14]]->tfar);
    const vfloat4 c15 = vfloat4::loadu(&ptr[index[15]]->tfar);

    vfloat16 maskf, idf, flagsf;
    transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
              ray.tfar, maskf, idf, flagsf);

    ray.mask  = asInt(maskf);
    ray.id    = asInt(idf);
    ray.flags = asInt(flagsf);

    return ray;
  }
#endif
}