godot/thirdparty/embree/kernels/geometry/linei.h

// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "primitive.h"

namespace embree
{
  template<int M>
  struct LineMi
  {};

  template<>
    __forceinline void LineMi<4>::gather(Vec4vf4& p0,
                                         Vec4vf4& p1,
                                         const LineSegments* geom) const
  {}

  template<>
  __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
                                       Vec4vf4& p1,
                                       const LineSegments* geom,
                                       const int itime) const
  {}

  template<>
    __forceinline void LineMi<4>::gather(Vec4vf4& p0,
                                         Vec4vf4& p1,
                                         const LineSegments* geom,
                                         float time) const
  {}

  template<>
    __forceinline void LineMi<4>::gather(Vec4vf4& p0,
                                         Vec4vf4& p1,
                                         vbool4&  cL,
                                         vbool4&  cR,
                                         const LineSegments* geom) const
  {}

  template<>
    __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
                                          Vec4vf4& p1,
                                          vbool4&  cL,
                                          vbool4&  cR,
                                          const LineSegments* geom,
                                          const int itime) const
  {}

  template<>
    __forceinline void LineMi<4>::gather(Vec4vf4& p0,
                                         Vec4vf4& p1,
                                         vbool4&  cL,
                                         vbool4&  cR,
                                         const LineSegments* geom,
                                         float time) const
  {}

  template<>
    __forceinline void LineMi<4>::gather(Vec4vf4& p0,
                                              Vec4vf4& p1,
                                              Vec4vf4& pL,
                                              Vec4vf4& pR,
                                              const LineSegments* geom) const
  {}
  
  template<>
    __forceinline void LineMi<4>::gatheri(Vec4vf4& p0,
                                              Vec4vf4& p1,
                                              Vec4vf4& pL,
                                              Vec4vf4& pR,
                                              const LineSegments* geom,
                                              const int itime) const
  {}
  
  template<>
    __forceinline void LineMi<4>::gather(Vec4vf4& p0,
                                              Vec4vf4& p1,
                                              Vec4vf4& pL,
                                              Vec4vf4& pR,
                                              const LineSegments* geom,
                                              float time) const
  {}

#if defined(__AVX__)

  template<>
    __forceinline void LineMi<8>::gather(Vec4vf8& p0,
                                         Vec4vf8& p1,
                                         const LineSegments* geom) const
  {
    const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
    const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
    const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
    const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
    const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
    const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
    const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
    const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
    transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);

    const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
    const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
    const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
    const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
    const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
    const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
    const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
    const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
    transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
  }

  template<>
  __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
                                       Vec4vf8& p1,
                                       const LineSegments* geom,
                                       const int itime) const
  {
    const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
    const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
    const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
    const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
    const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
    const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
    const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
    const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
    transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);

    const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
    const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
    const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
    const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
    const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
    const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
    const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
    const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
    transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
  }

  template<>
    __forceinline void LineMi<8>::gather(Vec4vf8& p0,
                                         Vec4vf8& p1,
                                         const LineSegments* geom,
                                         float time) const
  {
    float ftime;
    const int itime = geom->timeSegment(time, ftime);

    Vec4vf8 a0,a1;
    gatheri(a0,a1,geom,itime);
    Vec4vf8 b0,b1;
    gatheri(b0,b1,geom,itime+1);
    p0 = lerp(a0,b0,vfloat8(ftime));
    p1 = lerp(a1,b1,vfloat8(ftime));
  }
  
  template<>
    __forceinline void LineMi<8>::gather(Vec4vf8& p0,
                                              Vec4vf8& p1,
                                              Vec4vf8& pL,
                                              Vec4vf8& pR,
                                              const LineSegments* geom) const
  {
    const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0]));
    const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1]));
    const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2]));
    const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3]));
    const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4]));
    const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5]));
    const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6]));
    const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7]));
    transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
    
    const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1));
    const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1));
    const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1));
    const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1));
    const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1));
    const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1));
    const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1));
    const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1));
    transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
    
    const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1)) : vfloat4(inf);
    const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1)) : vfloat4(inf);
    const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1)) : vfloat4(inf);
    const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1)) : vfloat4(inf);
    const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1)) : vfloat4(inf);
    const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1)) : vfloat4(inf);
    const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1)) : vfloat4(inf);
    const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1)) : vfloat4(inf);
    transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
    
    const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2)) : vfloat4(inf);
    const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2)) : vfloat4(inf);
    const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2)) : vfloat4(inf);
    const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2)) : vfloat4(inf);
    const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2)) : vfloat4(inf);
    const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2)) : vfloat4(inf);
    const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2)) : vfloat4(inf);
    const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2)) : vfloat4(inf);
    transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
  }
  
  template<>
    __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
                                              Vec4vf8& p1,
                                              Vec4vf8& pL,
                                              Vec4vf8& pR,
                                              const LineSegments* geom,
                                              const int itime) const
  {
    const vfloat4 a0 = vfloat4::loadu(geom->vertexPtr(v0[0],itime));
    const vfloat4 a1 = vfloat4::loadu(geom->vertexPtr(v0[1],itime));
    const vfloat4 a2 = vfloat4::loadu(geom->vertexPtr(v0[2],itime));
    const vfloat4 a3 = vfloat4::loadu(geom->vertexPtr(v0[3],itime));
    const vfloat4 a4 = vfloat4::loadu(geom->vertexPtr(v0[4],itime));
    const vfloat4 a5 = vfloat4::loadu(geom->vertexPtr(v0[5],itime));
    const vfloat4 a6 = vfloat4::loadu(geom->vertexPtr(v0[6],itime));
    const vfloat4 a7 = vfloat4::loadu(geom->vertexPtr(v0[7],itime));
    transpose(a0,a1,a2,a3,a4,a5,a6,a7,p0.x,p0.y,p0.z,p0.w);
    
    const vfloat4 b0 = vfloat4::loadu(geom->vertexPtr(v0[0]+1,itime));
    const vfloat4 b1 = vfloat4::loadu(geom->vertexPtr(v0[1]+1,itime));
    const vfloat4 b2 = vfloat4::loadu(geom->vertexPtr(v0[2]+1,itime));
    const vfloat4 b3 = vfloat4::loadu(geom->vertexPtr(v0[3]+1,itime));
    const vfloat4 b4 = vfloat4::loadu(geom->vertexPtr(v0[4]+1,itime));
    const vfloat4 b5 = vfloat4::loadu(geom->vertexPtr(v0[5]+1,itime));
    const vfloat4 b6 = vfloat4::loadu(geom->vertexPtr(v0[6]+1,itime));
    const vfloat4 b7 = vfloat4::loadu(geom->vertexPtr(v0[7]+1,itime));
    transpose(b0,b1,b2,b3,b4,b5,b6,b7,p1.x,p1.y,p1.z,p1.w);
    
    const vfloat4 l0 = (leftExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]-1,itime)) : vfloat4(inf);
    const vfloat4 l1 = (leftExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]-1,itime)) : vfloat4(inf);
    const vfloat4 l2 = (leftExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]-1,itime)) : vfloat4(inf);
    const vfloat4 l3 = (leftExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]-1,itime)) : vfloat4(inf);
    const vfloat4 l4 = (leftExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]-1,itime)) : vfloat4(inf);
    const vfloat4 l5 = (leftExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]-1,itime)) : vfloat4(inf);
    const vfloat4 l6 = (leftExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]-1,itime)) : vfloat4(inf);
    const vfloat4 l7 = (leftExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]-1,itime)) : vfloat4(inf);
    transpose(l0,l1,l2,l3,l4,l5,l6,l7,pL.x,pL.y,pL.z,pL.w);
    
    const vfloat4 r0 = (rightExists & (1<<0)) ? vfloat4::loadu(geom->vertexPtr(v0[0]+2,itime)) : vfloat4(inf);
    const vfloat4 r1 = (rightExists & (1<<1)) ? vfloat4::loadu(geom->vertexPtr(v0[1]+2,itime)) : vfloat4(inf);
    const vfloat4 r2 = (rightExists & (1<<2)) ? vfloat4::loadu(geom->vertexPtr(v0[2]+2,itime)) : vfloat4(inf);
    const vfloat4 r3 = (rightExists & (1<<3)) ? vfloat4::loadu(geom->vertexPtr(v0[3]+2,itime)) : vfloat4(inf);
    const vfloat4 r4 = (rightExists & (1<<4)) ? vfloat4::loadu(geom->vertexPtr(v0[4]+2,itime)) : vfloat4(inf);
    const vfloat4 r5 = (rightExists & (1<<5)) ? vfloat4::loadu(geom->vertexPtr(v0[5]+2,itime)) : vfloat4(inf);
    const vfloat4 r6 = (rightExists & (1<<6)) ? vfloat4::loadu(geom->vertexPtr(v0[6]+2,itime)) : vfloat4(inf);
    const vfloat4 r7 = (rightExists & (1<<7)) ? vfloat4::loadu(geom->vertexPtr(v0[7]+2,itime)) : vfloat4(inf);
    transpose(r0,r1,r2,r3,r4,r5,r6,r7,pR.x,pR.y,pR.z,pR.w);
  }
  
  template<>
    __forceinline void LineMi<8>::gather(Vec4vf8& p0,
                                              Vec4vf8& p1,
                                              Vec4vf8& pL,
                                              Vec4vf8& pR,
                                              const LineSegments* geom,
                                              float time) const
  {
    float ftime;
    const int itime = geom->timeSegment(time, ftime);
    
    Vec4vf8 a0,a1,aL,aR;
    gatheri(a0,a1,aL,aR,geom,itime);
    Vec4vf8 b0,b1,bL,bR;
    gatheri(b0,b1,bL,bR,geom,itime+1);
    p0 = lerp(a0,b0,vfloat8(ftime));
    p1 = lerp(a1,b1,vfloat8(ftime));
    pL = lerp(aL,bL,vfloat8(ftime));
    pR = lerp(aR,bR,vfloat8(ftime));
    
    pL = select(vboolf4(leftExists), pL, Vec4vf8(inf));
    pR = select(vboolf4(rightExists), pR, Vec4vf8(inf));
  }

  template<>
    __forceinline void LineMi<8>::gather(Vec4vf8& p0,
                                         Vec4vf8& p1,
                                         vbool8& cL,
                                         vbool8& cR,
                                         const LineSegments* geom) const
  {
    gather(p0,p1,geom);
    cL = !vbool8(leftExists);
    cR = !vbool8(rightExists);
  }
  
  template<>
    __forceinline void LineMi<8>::gatheri(Vec4vf8& p0,
                                              Vec4vf8& p1,
                                              vbool8& cL,
                                              vbool8& cR,
                                              const LineSegments* geom,
                                              const int itime) const
  {
    gatheri(p0,p1,geom,itime);
    cL = !vbool8(leftExists);
    cR = !vbool8(rightExists);
  }
  
  template<>
    __forceinline void LineMi<8>::gather(Vec4vf8& p0,
                                              Vec4vf8& p1,
                                              vbool8& cL,
                                              vbool8& cR,
                                              const LineSegments* geom,
                                              float time) const
  {
    float ftime;
    const int itime = geom->timeSegment(time, ftime);
    
    Vec4vf8 a0,a1;
    gatheri(a0,a1,geom,itime);
    Vec4vf8 b0,b1;
    gatheri(b0,b1,geom,itime+1);
    p0 = lerp(a0,b0,vfloat8(ftime));
    p1 = lerp(a1,b1,vfloat8(ftime));
    cL = !vbool8(leftExists);
    cR = !vbool8(rightExists);
  }
  
#endif
  
  template<int M>
  typename LineMi<M>::Type LineMi<M>::type;

  Line4i;
  Line8i;
}