21 __device__ __inline__
void dot(
float (&v1)[3],
float (&v2)[3],
float &out)
25 for(
int n=0; n<3; n++)
40 __device__ __inline__
void dot(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex &out)
42 out = make_cuFloatComplex(0, 0);
44 for(
int n=0; n<3; n++)
46 out = cuCaddf(cuCmulf(cv1[n], cv2[n]), out);
59 __device__ __inline__
void dot(cuFloatComplex (&cv1)[3],
float (&v2)[3], cuFloatComplex &out)
61 out = make_cuFloatComplex(0, 0);
63 for(
int n=0; n<3; n++)
65 out = cuCaddf(cuCmulf(cv1[n], make_cuFloatComplex(v2[n], 0)), out);
78 __device__ __inline__
void dot(
float (&v1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex &out)
80 out = make_cuFloatComplex(0, 0);
82 for(
int n=0; n<3; n++)
84 out = cuCaddf(cuCmulf(make_cuFloatComplex(v1[n], 0), cv2[n]), out);
97 __device__ __inline__
void ext(
float (&v1)[3],
float (&v2)[3],
float (&out)[3])
99 out[0] = v1[1]*v2[2] - v1[2]*v2[1];
100 out[1] = v1[2]*v2[0] - v1[0]*v2[2];
101 out[2] = v1[0]*v2[1] - v1[1]*v2[0];
114 __device__ __inline__
void ext(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
116 out[0] = cuCsubf(cuCmulf(cv1[1], cv2[2]), cuCmulf(cv1[2], cv2[1]));
117 out[1] = cuCsubf(cuCmulf(cv1[2], cv2[0]), cuCmulf(cv1[0], cv2[2]));
118 out[2] = cuCsubf(cuCmulf(cv1[0], cv2[1]), cuCmulf(cv1[1], cv2[0]));
130 __device__ __inline__
void ext(cuFloatComplex (&cv1)[3],
float (&v2)[3], cuFloatComplex (&out)[3])
132 out[0] = cuCsubf(cuCmulf(cv1[1], make_cuFloatComplex(v2[2],0)), cuCmulf(cv1[2], make_cuFloatComplex(v2[1],0)));
133 out[1] = cuCsubf(cuCmulf(cv1[2], make_cuFloatComplex(v2[0],0)), cuCmulf(cv1[0], make_cuFloatComplex(v2[2],0)));
134 out[2] = cuCsubf(cuCmulf(cv1[0], make_cuFloatComplex(v2[1],0)), cuCmulf(cv1[1], make_cuFloatComplex(v2[0],0)));
146 __device__ __inline__
void ext(
float (&v1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
148 out[0] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[1],0), cv2[2]), cuCmulf(make_cuFloatComplex(v1[2],0), cv2[1]));
149 out[1] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[2],0), cv2[0]), cuCmulf(make_cuFloatComplex(v1[0],0), cv2[2]));
150 out[2] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[0],0), cv2[1]), cuCmulf(make_cuFloatComplex(v1[1],0), cv2[0]));
162 __device__ __inline__
void diff(
float (&v1)[3],
float (&v2)[3],
float (&out)[3])
164 for(
int n=0; n<3; n++)
166 out[n] = v1[n] - v2[n];
179 __device__ __inline__
void diff(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
181 for(
int n=0; n<3; n++)
183 out[n] = cuCsubf(cv1[n], cv2[n]);
195 __device__ __inline__
void abs(
float (&v)[3],
float &out)
209 __device__ __inline__
void conja(cuFloatComplex (&cv)[3], cuFloatComplex (&out)[3])
211 for(
int n=0; n<3; n++)
213 out[n] = cuConjf(cv[n]);
225 __device__ __inline__
void normalize(
float (&v)[3],
float (&out)[3])
236 for(
int n=0; n<3; n++)
238 out[n] = v[n] / norm;
251 __device__ __inline__
void add(
float (&v1)[3],
float (&v2)[3],
float (&out)[3])
253 for(
int n=0; n<3; n++)
255 out[n] = v1[n] + v2[n];
268 __device__ __inline__
void s_mult(
float (&v)[3],
float &s,
float (&out)[3])
270 for(
int n=0; n<3; n++)
285 __device__ __inline__
void s_mult(cuFloatComplex (&cv)[3], cuFloatComplex &cs, cuFloatComplex (&out)[3])
287 for(
int n=0; n<3; n++)
289 out[n] = cuCmulf(cs, cv[n]);
302 __device__ __inline__
void s_mult(
float (&v)[3], cuFloatComplex &cs, cuFloatComplex (&out)[3])
304 for(
int n=0; n<3; n++)
306 out[n] = cuCmulf(cs, make_cuFloatComplex(v[n],0));
319 __device__ __inline__
void s_mult(cuFloatComplex (&cv)[3],
const float &s, cuFloatComplex (&out)[3])
321 for(
int n=0; n<3; n++)
323 out[n] = cuCmulf(make_cuFloatComplex(s,0), cv[n]);
336 __device__ __inline__
void snell(cuFloatComplex (&cvin)[3],
float (&normal)[3], cuFloatComplex (&out)[3])
338 cuFloatComplex cfactor;
339 dot(cvin, normal, cfactor);
341 cfactor = cuCmulf(make_cuFloatComplex(2.,0), cfactor);
343 cuFloatComplex rhs[3];
344 s_mult(normal, cfactor, rhs);
346 diff(cvin, rhs, out);
358 __device__ __inline__
void snell(
float (&vin)[3],
float (&normal)[3],
float (&out)[3])
361 dot(vin, normal, factor);
363 factor = 2. * factor;
366 s_mult(normal, factor, rhs);
381 __device__ __inline__
void snell_t(
float (&vin)[3],
float (&normal)[3],
float mu,
float (&out)[3])
383 float in_dot_n, factor1;
384 float term1[3], term2[3], temp1[3], temp2[3];
386 dot(normal, vin, in_dot_n);
388 factor1 = sqrt(1 - mu*mu * (1 - in_dot_n*in_dot_n));
389 s_mult(normal, factor1, term1);
391 s_mult(normal, in_dot_n, temp1);
392 diff(vin, temp1, temp2);
395 add(term1, term2, out);
407 __device__ __inline__
void dyad(
float (&v1)[3],
float (&v2)[3],
float (&out)[3][3])
409 for(
int n=0; n<3; n++)
411 out[n][0] = v1[n] * v2[0];
412 out[n][1] = v1[n] * v2[1];
413 out[n][2] = v1[n] * v2[2];
426 __device__ __inline__
void matDiff(
float (&m1)[3][3],
float (&m2)[3][3],
float (&out)[3][3])
428 for(
int n=0; n<3; n++)
430 out[n][0] = m1[n][0] - m2[n][0];
431 out[n][1] = m1[n][1] - m2[n][1];
432 out[n][2] = m1[n][2] - m2[n][2];
445 __device__ __inline__
void matVec(
float (&m1)[3][3],
float (&v1)[3],
float (&out)[3])
447 for(
int n=0; n<3; n++)
449 out[n] = m1[n][0] * v1[0] + m1[n][1] * v1[1] + m1[n][2] * v1[2];
462 __device__ __inline__
void matVec(
float (&m1)[3][3], cuFloatComplex (&cv1)[3], cuFloatComplex (&out)[3])
464 for(
int n=0; n<3; n++)
466 out[n] = cuCaddf(cuCmulf(make_cuFloatComplex(m1[n][0],0), cv1[0]),
467 cuCaddf(cuCmulf(make_cuFloatComplex(m1[n][1],0), cv1[1]),
468 cuCmulf(make_cuFloatComplex(m1[n][2],0), cv1[2])));
481 __device__ __inline__
void matVec4(
float (&mat)[16],
float (&cv1)[3],
float (&out)[3],
bool vec =
false)
485 for(
int n=0; n<3; n++)
487 out[n] = mat[n*4] * cv1[0] + mat[1+n*4] * cv1[1] + mat[2+n*4] * cv1[2];
493 for(
int n=0; n<3; n++)
495 out[n] = mat[n*4] * cv1[0] + mat[1+n*4] * cv1[1] + mat[2+n*4] * cv1[2] + mat[3+n*4];
509 __device__ __inline__
void invmatVec4(
float (&mat)[16],
float (&cv1)[3],
float (&out)[3],
bool vec =
false)
513 for(
int n=0; n<3; n++)
515 out[n] = mat[n] * cv1[0] + mat[n+4] * cv1[1] + mat[n+8] * cv1[2];
522 for(
int n=0; n<3; n++)
524 temp = -mat[n]*mat[3] - mat[n+4]*mat[7] - mat[n+8]*mat[11];
525 out[n] = mat[n] * cv1[0] + mat[n+4] * cv1[1] + mat[n+8] * cv1[2] + temp;
537 __device__ __inline__ cuFloatComplex
cuCexpf(cuFloatComplex z)
542 sincosf(z.y, &ys, &yc);
555 __device__ __inline__ cuFloatComplex
cuCaddSf(cuFloatComplex a,
float b)
569 __device__ __inline__ cuFloatComplex
cuCaddSf(
float a, cuFloatComplex b)
583 __device__ __inline__ cuFloatComplex
cuCsubSf(cuFloatComplex a,
float b)
597 __device__ __inline__ cuFloatComplex
cuCsubSf(
float a, cuFloatComplex b)
611 __device__ __inline__ cuFloatComplex
cuCmulSf(cuFloatComplex a,
float b)
625 __device__ __inline__ cuFloatComplex
cuCmulSf(
float a, cuFloatComplex b)
639 __device__ __inline__ cuFloatComplex
cuCdivSf(cuFloatComplex a,
float b)
653 __device__ __inline__ cuFloatComplex
cuCdivSf(
float a, cuFloatComplex b)
655 cuFloatComplex ac, res;
656 ac = make_cuFloatComplex(a, 0.);
658 res = cuCdivf(ac, b);