21 __device__ __inline__
void dot(
float (&v1)[3],
float (&v2)[3],
float &out)
25 for(
int n=0; n<3; n++)
40 __device__ __inline__
void dot(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex &out)
42 out = make_cuFloatComplex(0, 0);
44 for(
int n=0; n<3; n++)
46 out = cuCaddf(cuCmulf(cuConjf(cv1[n]), cv2[n]), out);
59 __device__ __inline__
void dot(cuFloatComplex (&cv1)[3],
float (&v2)[3], cuFloatComplex &out)
61 out = make_cuFloatComplex(0, 0);
63 for(
int n=0; n<3; n++)
65 out = cuCaddf(cuCmulf(cuConjf(cv1[n]), make_cuFloatComplex(v2[n], 0)), out);
78 __device__ __inline__
void dot(
float (&v1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex &out)
80 out = make_cuFloatComplex(0, 0);
82 for(
int n=0; n<3; n++)
84 out = cuCaddf(cuCmulf(make_cuFloatComplex(v1[n], 0), cv2[n]), out);
97 __device__ __inline__
void ext(
float (&v1)[3],
float (&v2)[3],
float (&out)[3])
99 out[0] = v1[1]*v2[2] - v1[2]*v2[1];
100 out[1] = v1[2]*v2[0] - v1[0]*v2[2];
101 out[2] = v1[0]*v2[1] - v1[1]*v2[0];
114 __device__ __inline__
void ext(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
116 out[0] = cuCsubf(cuCmulf(cv1[1], cv2[2]), cuCmulf(cv1[2], cv2[1]));
117 out[1] = cuCsubf(cuCmulf(cv1[2], cv2[0]), cuCmulf(cv1[0], cv2[2]));
118 out[2] = cuCsubf(cuCmulf(cv1[0], cv2[1]), cuCmulf(cv1[1], cv2[0]));
130 __device__ __inline__
void ext(cuFloatComplex (&cv1)[3],
float (&v2)[3], cuFloatComplex (&out)[3])
132 out[0] = cuCsubf(cuCmulf(cv1[1], make_cuFloatComplex(v2[2],0)), cuCmulf(cv1[2], make_cuFloatComplex(v2[1],0)));
133 out[1] = cuCsubf(cuCmulf(cv1[2], make_cuFloatComplex(v2[0],0)), cuCmulf(cv1[0], make_cuFloatComplex(v2[2],0)));
134 out[2] = cuCsubf(cuCmulf(cv1[0], make_cuFloatComplex(v2[1],0)), cuCmulf(cv1[1], make_cuFloatComplex(v2[0],0)));
146 __device__ __inline__
void ext(
float (&v1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
148 out[0] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[1],0), cv2[2]), cuCmulf(make_cuFloatComplex(v1[2],0), cv2[1]));
149 out[1] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[2],0), cv2[0]), cuCmulf(make_cuFloatComplex(v1[0],0), cv2[2]));
150 out[2] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[0],0), cv2[1]), cuCmulf(make_cuFloatComplex(v1[1],0), cv2[0]));
162 __device__ __inline__
void diff(
float (&v1)[3],
float (&v2)[3],
float (&out)[3])
164 for(
int n=0; n<3; n++)
166 out[n] = v1[n] - v2[n];
179 __device__ __inline__
void diff(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
181 for(
int n=0; n<3; n++)
183 out[n] = cuCsubf(cv1[n], cv2[n]);
195 __device__ __inline__
void abs(
float (&v)[3],
float &out)
209 __device__ __inline__
void conja(cuFloatComplex (&cv)[3], cuFloatComplex (&out)[3])
211 for(
int n=0; n<3; n++)
213 out[n] = cuConjf(cv[n]);
225 __device__ __inline__
void normalize(
float (&v)[3],
float (&out)[3])
236 for(
int n=0; n<3; n++)
238 out[n] = v[n] / norm;
251 __device__ __inline__
void add(
float (&v1)[3],
float (&v2)[3],
float (&out)[3])
253 for(
int n=0; n<3; n++)
255 out[n] = v1[n] + v2[n];
268 __device__ __inline__
void s_mult(
float (&v)[3],
float &s,
float (&out)[3])
270 for(
int n=0; n<3; n++)
285 __device__ __inline__
void s_mult(cuFloatComplex (&cv)[3], cuFloatComplex &cs, cuFloatComplex (&out)[3])
287 for(
int n=0; n<3; n++)
289 out[n] = cuCmulf(cs, cv[n]);
302 __device__ __inline__
void s_mult(
float (&v)[3], cuFloatComplex &cs, cuFloatComplex (&out)[3])
304 for(
int n=0; n<3; n++)
306 out[n] = cuCmulf(cs, make_cuFloatComplex(v[n],0));
319 __device__ __inline__
void s_mult(cuFloatComplex (&cv)[3],
const float &s, cuFloatComplex (&out)[3])
321 for(
int n=0; n<3; n++)
323 out[n] = cuCmulf(make_cuFloatComplex(s,0), cv[n]);
336 __device__ __inline__
void snell(cuFloatComplex (&cvin)[3],
float (&normal)[3], cuFloatComplex (&out)[3])
338 cuFloatComplex cfactor;
339 dot(cvin, normal, cfactor);
341 cfactor = cuCmulf(make_cuFloatComplex(2.,0), cfactor);
343 cuFloatComplex rhs[3];
344 s_mult(normal, cfactor, rhs);
346 diff(cvin, rhs, out);
358 __device__ __inline__
void snell(
float (&vin)[3],
float (&normal)[3],
float (&out)[3])
361 dot(vin, normal, factor);
363 factor = 2. * factor;
366 s_mult(normal, factor, rhs);
381 __device__ __inline__
void snell_t(
float (&vin)[3],
float (&normal)[3],
float mu,
float (&out)[3])
383 float in_dot_n, factor1;
384 float term1[3], term2[3], temp1[3], temp2[3];
386 dot(normal, vin, in_dot_n);
388 factor1 = sqrt(1 - mu*mu * (1 - in_dot_n*in_dot_n));
389 s_mult(normal, factor1, term1);
391 s_mult(normal, in_dot_n, temp1);
392 diff(vin, temp1, temp2);
395 add(term1, term2, out);
407 __device__ __inline__
void dyad(
float (&v1)[3],
float (&v2)[3],
float (&out)[3][3])
409 for(
int n=0; n<3; n++)
411 out[n][0] = v1[n] * v2[0];
412 out[n][1] = v1[n] * v2[1];
413 out[n][2] = v1[n] * v2[2];
426 __device__ __inline__
void matDiff(
float (&m1)[3][3],
float (&m2)[3][3],
float (&out)[3][3])
428 for(
int n=0; n<3; n++)
430 out[n][0] = m1[n][0] - m2[n][0];
431 out[n][1] = m1[n][1] - m2[n][1];
432 out[n][2] = m1[n][2] - m2[n][2];
445 __device__ __inline__
void matVec(
float (&m1)[3][3],
float (&v1)[3],
float (&out)[3])
447 for(
int n=0; n<3; n++)
449 out[n] = m1[n][0] * v1[0] + m1[n][1] * v1[1] + m1[n][2] * v1[2];
462 __device__ __inline__
void matVec(
float (&m1)[3][3], cuFloatComplex (&cv1)[3], cuFloatComplex (&out)[3])
464 for(
int n=0; n<3; n++)
466 out[n] = cuCaddf(cuCmulf(make_cuFloatComplex(m1[n][0],0), cv1[0]),
467 cuCaddf(cuCmulf(make_cuFloatComplex(m1[n][1],0), cv1[1]),
468 cuCmulf(make_cuFloatComplex(m1[n][2],0), cv1[2])));
481 __device__ __inline__
void matVec4(
float (&mat)[16],
float (&cv1)[3],
float (&out)[3],
bool vec =
false)
485 for(
int n=0; n<3; n++)
487 out[n] = mat[n*4] * cv1[0] + mat[1+n*4] * cv1[1] + mat[2+n*4] * cv1[2];
493 for(
int n=0; n<3; n++)
495 out[n] = mat[n*4] * cv1[0] + mat[1+n*4] * cv1[1] + mat[2+n*4] * cv1[2] + mat[3+n*4];
509 __device__ __inline__
void invmatVec4(
float (&mat)[16],
float (&cv1)[3],
float (&out)[3],
bool vec =
false)
513 for(
int n=0; n<3; n++)
515 out[n] = mat[n] * cv1[0] + mat[n+4] * cv1[1] + mat[n+8] * cv1[2];
522 for(
int n=0; n<3; n++)
524 temp = -mat[n]*mat[3] - mat[n+4]*mat[7] - mat[n+8]*mat[11];
525 out[n] = mat[n] * cv1[0] + mat[n+4] * cv1[1] + mat[n+8] * cv1[2] + temp;
537 __device__ __inline__ cuFloatComplex
cuCexpf(cuFloatComplex z)
542 sincosf(z.y, &ys, &yc);
555 __device__ __inline__ cuFloatComplex
cuCaddSf(cuFloatComplex a,
float b)
569 __device__ __inline__ cuFloatComplex
cuCaddSf(
float a, cuFloatComplex b)
583 __device__ __inline__ cuFloatComplex
cuCsubSf(cuFloatComplex a,
float b)
597 __device__ __inline__ cuFloatComplex
cuCsubSf(
float a, cuFloatComplex b)
611 __device__ __inline__ cuFloatComplex
cuCmulSf(cuFloatComplex a,
float b)
625 __device__ __inline__ cuFloatComplex
cuCmulSf(
float a, cuFloatComplex b)
639 __device__ __inline__ cuFloatComplex
cuCdivSf(cuFloatComplex a,
float b)
653 __device__ __inline__ cuFloatComplex
cuCdivSf(
float a, cuFloatComplex b)
655 cuFloatComplex ac, res;
656 ac = make_cuFloatComplex(a, 0.);
658 res = cuCdivf(ac, b);
__device__ __inline__ cuFloatComplex cuCsubSf(cuFloatComplex a, float b)
Definition: GUtils.h:583
__device__ __inline__ void conja(cuFloatComplex(&cv)[3], cuFloatComplex(&out)[3])
Definition: GUtils.h:209
__device__ __inline__ cuFloatComplex cuCaddSf(cuFloatComplex a, float b)
Definition: GUtils.h:555
__device__ __inline__ void snell_t(float(&vin)[3], float(&normal)[3], float mu, float(&out)[3])
Definition: GUtils.h:381
__device__ __inline__ void diff(float(&v1)[3], float(&v2)[3], float(&out)[3])
Definition: GUtils.h:162
__device__ __inline__ void invmatVec4(float(&mat)[16], float(&cv1)[3], float(&out)[3], bool vec=false)
Definition: GUtils.h:509
__device__ __inline__ void abs(float(&v)[3], float &out)
Definition: GUtils.h:195
__device__ __inline__ void s_mult(float(&v)[3], float &s, float(&out)[3])
Definition: GUtils.h:268
__device__ __inline__ void matVec4(float(&mat)[16], float(&cv1)[3], float(&out)[3], bool vec=false)
Definition: GUtils.h:481
__device__ __inline__ cuFloatComplex cuCdivSf(cuFloatComplex a, float b)
Definition: GUtils.h:639
__device__ __inline__ void snell(cuFloatComplex(&cvin)[3], float(&normal)[3], cuFloatComplex(&out)[3])
Definition: GUtils.h:336
__device__ __inline__ void add(float(&v1)[3], float(&v2)[3], float(&out)[3])
Definition: GUtils.h:251
__device__ __inline__ void matDiff(float(&m1)[3][3], float(&m2)[3][3], float(&out)[3][3])
Definition: GUtils.h:426
__device__ __inline__ void dot(float(&v1)[3], float(&v2)[3], float &out)
Definition: GUtils.h:21
__device__ __inline__ void dyad(float(&v1)[3], float(&v2)[3], float(&out)[3][3])
Definition: GUtils.h:407
__device__ __inline__ void ext(float(&v1)[3], float(&v2)[3], float(&out)[3])
Definition: GUtils.h:97
__device__ __inline__ void normalize(float(&v)[3], float(&out)[3])
Definition: GUtils.h:225
__device__ __inline__ cuFloatComplex cuCexpf(cuFloatComplex z)
Definition: GUtils.h:537
__device__ __inline__ cuFloatComplex cuCmulSf(cuFloatComplex a, float b)
Definition: GUtils.h:611
__device__ __inline__ void matVec(float(&m1)[3][3], float(&v1)[3], float(&out)[3])
Definition: GUtils.h:445