PyPO User Manual
GUtils.h
Go to the documentation of this file.
1 #include <math.h>
2 #include <cuda.h>
3 #include <cuComplex.h>
4 
5 /*! \file GUtils.h
6  \brief Linear algebra functions for the CUDA version of PyPO.
7 
8  Contains float overloaded functions for doing basic 3D vector operations.
9  For the CUDA complex valued linear algebra, we employ the cuComplex.h library.
10 */
11 
12 /**
13  * Dot product.
14  *
15  * Take the dot (inner) product of two real valued arrays of size 3.
16  *
17  * @param v1 Array of 3 float.
18  * @param v2 Array of 3 float.
19  * @param out Scalar float.
20  */
21 __device__ __inline__ void dot(float (&v1)[3], float (&v2)[3], float &out)
22 {
23  out = 0;
24 
25  for(int n=0; n<3; n++)
26  {
27  out += v1[n] * v2[n];
28  }
29 }
30 
31 /**
32  * Dot product.
33  *
34  * Take the dot (inner) product of two complex valued float arrays of size 3.
35  *
36  * @param cv1 Array of 3 complex float.
37  * @param cv2 Array of 3 complex float.
38  * @param out Scalar complex float.
39  */
40 __device__ __inline__ void dot(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex &out)
41 {
42  out = make_cuFloatComplex(0, 0);
43 
44  for(int n=0; n<3; n++)
45  {
46  out = cuCaddf(cuCmulf(cuConjf(cv1[n]), cv2[n]), out);
47  }
48 }
49 
50 /**
51  * Dot product.
52  *
53  * Take the dot (inner) product of one complex valued and one real valued float array of size 3.
54  *
55  * @param cv1 Array of 3 complex float.
56  * @param v2 Array of 3 float.
57  * @param out Scalar complex float.
58  */
59 __device__ __inline__ void dot(cuFloatComplex (&cv1)[3], float (&v2)[3], cuFloatComplex &out)
60 {
61  out = make_cuFloatComplex(0, 0);
62 
63  for(int n=0; n<3; n++)
64  {
65  out = cuCaddf(cuCmulf(cuConjf(cv1[n]), make_cuFloatComplex(v2[n], 0)), out);
66  }
67 }
68 
69 /**
70  * Dot product.
71  *
72  * Take the dot (inner) product of one real valued and one complex valued float array of size 3.
73  *
74  * @param v1 Array of 3 float.
75  * @param cv2 Array of 3 complex float.
76  * @param out Scalar complex float.
77  */
78 __device__ __inline__ void dot(float (&v1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex &out)
79 {
80  out = make_cuFloatComplex(0, 0);
81 
82  for(int n=0; n<3; n++)
83  {
84  out = cuCaddf(cuCmulf(make_cuFloatComplex(v1[n], 0), cv2[n]), out);
85  }
86 }
87 
88 /**
89  * Cross product.
90  *
91  * Take the cross (outer) product of two real valued float arrays of size 3.
92  *
93  * @param v1 Array of 3 float.
94  * @param v2 Array of 3 float.
95  * @param out Array of 3 float.
96  */
97 __device__ __inline__ void ext(float (&v1)[3], float (&v2)[3], float (&out)[3])
98 {
99  out[0] = v1[1]*v2[2] - v1[2]*v2[1];
100  out[1] = v1[2]*v2[0] - v1[0]*v2[2];
101  out[2] = v1[0]*v2[1] - v1[1]*v2[0];
102 }
103 
104 
105 /**
106  * Cross product.
107  *
108  * Take the cross (outer) product of two complex valued float arrays of size 3.
109  *
110  * @param cv1 Array of 3 complex float.
111  * @param cv2 Array of 3 complex float.
112  * @param out Array of 3 complex float.
113  */
114 __device__ __inline__ void ext(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
115 {
116  out[0] = cuCsubf(cuCmulf(cv1[1], cv2[2]), cuCmulf(cv1[2], cv2[1]));
117  out[1] = cuCsubf(cuCmulf(cv1[2], cv2[0]), cuCmulf(cv1[0], cv2[2]));
118  out[2] = cuCsubf(cuCmulf(cv1[0], cv2[1]), cuCmulf(cv1[1], cv2[0]));
119 }
120 
121 /**
122  * Cross product.
123  *
124  * Take the cross (outer) product of one complex valued and one real valued float array of size 3.
125  *
126  * @param cv1 Array of 3 complex float.
127  * @param v2 Array of 3 float.
128  * @param out Array of 3 complex float.
129  */
130 __device__ __inline__ void ext(cuFloatComplex (&cv1)[3], float (&v2)[3], cuFloatComplex (&out)[3])
131 {
132  out[0] = cuCsubf(cuCmulf(cv1[1], make_cuFloatComplex(v2[2],0)), cuCmulf(cv1[2], make_cuFloatComplex(v2[1],0)));
133  out[1] = cuCsubf(cuCmulf(cv1[2], make_cuFloatComplex(v2[0],0)), cuCmulf(cv1[0], make_cuFloatComplex(v2[2],0)));
134  out[2] = cuCsubf(cuCmulf(cv1[0], make_cuFloatComplex(v2[1],0)), cuCmulf(cv1[1], make_cuFloatComplex(v2[0],0)));
135 }
136 
137 /**
138  * Cross product.
139  *
140  * Take the cross (outer) product of one real valued and one complex valued float array of size 3.
141  *
142  * @param v1 Array of 3 float.
143  * @param cv2 Array of 3 complex float.
144  * @param out Array of 3 complex float.
145  */
146 __device__ __inline__ void ext(float (&v1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
147 {
148  out[0] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[1],0), cv2[2]), cuCmulf(make_cuFloatComplex(v1[2],0), cv2[1]));
149  out[1] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[2],0), cv2[0]), cuCmulf(make_cuFloatComplex(v1[0],0), cv2[2]));
150  out[2] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[0],0), cv2[1]), cuCmulf(make_cuFloatComplex(v1[1],0), cv2[0]));
151 }
152 
153 /**
154  * Component-wise vector difference.
155  *
156  * Subtract two real valued vectors of size 3, element-wise.
157  *
158  * @param v1 Array of 3 float.
159  * @param v2 Array of 3 float.
160  * @param out Array of 3 float.
161  */
162 __device__ __inline__ void diff(float (&v1)[3], float (&v2)[3], float (&out)[3])
163 {
164  for(int n=0; n<3; n++)
165  {
166  out[n] = v1[n] - v2[n];
167  }
168 }
169 
170 /**
171  * Component-wise vector difference.
172  *
173  * Subtract two complex valued vectors of size 3, element-wise.
174  *
175  * @param cv1 Array of 3 complex float.
176  * @param cv2 Array of 3 complex float.
177  * @param out Array of 3 complex float.
178  */
179 __device__ __inline__ void diff(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
180 {
181  for(int n=0; n<3; n++)
182  {
183  out[n] = cuCsubf(cv1[n], cv2[n]);
184  }
185 }
186 
187 /**
188  * Absolute value.
189  *
190  * Calculate absolute value of real valued vector of size 3.
191  *
192  * @param v Array of 3 float.
193  * @param out Scalar float.
194  */
195 __device__ __inline__ void abs(float (&v)[3], float &out)
196 {
197  dot(v, v, out);
198  out = sqrt(out);
199 }
200 
201 /**
202  * Conjugate.
203  *
204  * Conjugate complex valued vector of size 3.
205  *
206  * @param cv Array of 3 complex float.
207  * @param out Array of 3 complex float.
208  */
209 __device__ __inline__ void conja(cuFloatComplex (&cv)[3], cuFloatComplex (&out)[3])
210 {
211  for(int n=0; n<3; n++)
212  {
213  out[n] = cuConjf(cv[n]);
214  }
215 }
216 
217 /**
218  * Normalize vector.
219  *
220  * Normalize real valued vector of size 3.
221  *
222  * @param v Array of 3 float.
223  * @param out Array of 3 float.
224  */
225 __device__ __inline__ void normalize(float (&v)[3], float (&out)[3])
226 {
227  float norm;
228  abs(v, norm);
229 
230 
231  if (norm == 0)
232  {
233  norm = 1;
234  }
235 
236  for( int n=0; n<3; n++)
237  {
238  out[n] = v[n] / norm;
239  }
240 }
241 
242 /**
243  * Vector addition.
244  *
245  * Add two real valued vectors of size 3 element-wise.
246  *
247  * @param v1 Array of 3 float.
248  * @param v2 Array of 3 float.
249  * @param out Array of 3 complex float.
250  */
251 __device__ __inline__ void add(float (&v1)[3], float (&v2)[3], float (&out)[3])
252 {
253  for(int n=0; n<3; n++)
254  {
255  out[n] = v1[n] + v2[n];
256  }
257 }
258 
259 /**
260  * Scalar multiplication.
261  *
262  * Multiply real valued vector of size 3 by real scalar, element-wise.
263  *
264  * @param v Array of 3 float.
265  * @param s Scalar float.
266  * @param out Array of 3 float.
267  */
268 __device__ __inline__ void s_mult(float (&v)[3], float &s, float (&out)[3])
269 {
270  for(int n=0; n<3; n++)
271  {
272  out[n] = s * v[n];
273  }
274 }
275 
276 /**
277  * Scalar multiplication.
278  *
279  * Multiply complex valued vector of size 3 by complex scalar, element-wise.
280  *
281  * @param cv Array of 3 complex float.
282  * @param cs Scalar complex float.
283  * @param out Array of 3 complex float.
284  */
285 __device__ __inline__ void s_mult(cuFloatComplex (&cv)[3], cuFloatComplex &cs, cuFloatComplex (&out)[3])
286 {
287  for(int n=0; n<3; n++)
288  {
289  out[n] = cuCmulf(cs, cv[n]);
290  }
291 }
292 
293 /**
294  * Scalar multiplication.
295  *
296  * Multiply real valued vector of size 3 by complex scalar, element-wise.
297  *
298  * @param v Array of 3 float.
299  * @param cs Scalar complex float.
300  * @param out Array of 3 complex float.
301  */
302 __device__ __inline__ void s_mult(float (&v)[3], cuFloatComplex &cs, cuFloatComplex (&out)[3])
303 {
304  for(int n=0; n<3; n++)
305  {
306  out[n] = cuCmulf(cs, make_cuFloatComplex(v[n],0));
307  }
308 }
309 
310 /**
311  * Scalar multiplication.
312  *
313  * Multiply complex valued vector of size 3 by real scalar, element-wise.
314  *
315  * @param cv Array of 3 complex float.
316  * @param s Scalar float.
317  * @param out Array of 3 complex float.
318  */
319 __device__ __inline__ void s_mult(cuFloatComplex (&cv)[3], const float &s, cuFloatComplex (&out)[3])
320 {
321  for(int n=0; n<3; n++)
322  {
323  out[n] = cuCmulf(make_cuFloatComplex(s,0), cv[n]);
324  }
325 }
326 
327 /**
328  * Snell's law reflection.
329  *
330  * Calculate reflected direction vector from incoming direction and normal vector.
331  *
332  * @param cvin Array of 3 complex float, incoming direction vector.
333  * @param normal Array of 3 float, normal vector of surface.
334  * @param out Array of 3 complex float.
335  */
336 __device__ __inline__ void snell(cuFloatComplex (&cvin)[3], float (&normal)[3], cuFloatComplex (&out)[3])
337 {
338  cuFloatComplex cfactor;
339  dot(cvin, normal, cfactor);
340 
341  cfactor = cuCmulf(make_cuFloatComplex(2.,0), cfactor);
342 
343  cuFloatComplex rhs[3];
344  s_mult(normal, cfactor, rhs);
345 
346  diff(cvin, rhs, out);
347 }
348 
349 /**
350  * Snell's law reflection.
351  *
352  * Calculate reflected direction vector from incoming direction and normal vector.
353  *
354  * @param vin Array of 3 float, incoming direction vector.
355  * @param normal Array of 3 float, normal vector of surface.
356  * @param out Array of 3 float.
357  */
358 __device__ __inline__ void snell(float (&vin)[3], float (&normal)[3], float (&out)[3])
359 {
360  float factor;
361  dot(vin, normal, factor);
362 
363  factor = 2. * factor;
364 
365  float rhs[3];
366  s_mult(normal, factor, rhs);
367 
368  diff(vin, rhs, out);
369 }
370 
371 /**
372  * Snell's law refraction.
373  *
374  * Calculate refracted direction vector from incoming direction and normal vector.
375  *
376  * @param vin Array of 3 double/float, incoming direction vector.
377  * @param normal Array of 3 double/float, normal vector of surface.
378  * @param mu Ratio of n1 to n2.
379  * @param out Array of 3 double/float.
380  */
381 __device__ __inline__ void snell_t(float (&vin)[3], float (&normal)[3], float mu, float (&out)[3])
382 {
383  float in_dot_n, factor1;
384  float term1[3], term2[3], temp1[3], temp2[3];
385 
386  dot(normal, vin, in_dot_n);
387 
388  factor1 = sqrt(1 - mu*mu * (1 - in_dot_n*in_dot_n));
389  s_mult(normal, factor1, term1);
390 
391  s_mult(normal, in_dot_n, temp1);
392  diff(vin, temp1, temp2);
393  s_mult(temp2, mu, term2);
394 
395  add(term1, term2, out);
396 }
397 
398 /**
399  * Dyadic product.
400  *
401  * Calculate dyadic product between two real valued float vectors of size 3.
402  *
403  * @param v1 Array of 3 float.
404  * @param v2 Array of 3 float.
405  * @param out Array of 3 float, nested inside array of size 3.
406  */
407 __device__ __inline__ void dyad(float (&v1)[3], float (&v2)[3], float (&out)[3][3])
408 {
409  for(int n=0; n<3; n++)
410  {
411  out[n][0] = v1[n] * v2[0];
412  out[n][1] = v1[n] * v2[1];
413  out[n][2] = v1[n] * v2[2];
414  }
415 }
416 
417 /**
418  * Matrix difference, element wise.
419  *
420  * Subtract two 3x3 matrices, element wise.
421  *
422  * @param m1 Array of 3 float, nested inside array of size 3.
423  * @param m2 Array of 3 float, nested inside array of size 3.
424  * @param out Array of 3 float, nested inside array of size 3.
425  */
426 __device__ __inline__ void matDiff(float (&m1)[3][3], float (&m2)[3][3], float (&out)[3][3])
427 {
428  for(int n=0; n<3; n++)
429  {
430  out[n][0] = m1[n][0] - m2[n][0];
431  out[n][1] = m1[n][1] - m2[n][1];
432  out[n][2] = m1[n][2] - m2[n][2];
433  }
434 }
435 
436 /**
437  * Matrix-vector product.
438  *
439  * Multiply a real valued 3x3 matrix and a real valued size 3 vector to generate a new real valued size 3 vector.
440  *
441  * @param m1 Array of 3 float, nested inside array of size 3.
442  * @param v1 Array of 3 float.
443  * @param out Array of 3 float.
444  */
445 __device__ __inline__ void matVec(float (&m1)[3][3], float (&v1)[3], float (&out)[3])
446 {
447  for(int n=0; n<3; n++)
448  {
449  out[n] = m1[n][0] * v1[0] + m1[n][1] * v1[1] + m1[n][2] * v1[2];
450  }
451 }
452 
453 /**
454  * Matrix-vector product.
455  *
456  * Multiply a real valued 3x3 matrix and a complex valued size 3 vector to generate a new complex valued size 3 vector.
457  *
458  * @param m1 Array of 3 float, nested inside array of size 3.
459  * @param cv1 Array of 3 complex float.
460  * @param out Array of 3 complex float.
461  */
462 __device__ __inline__ void matVec(float (&m1)[3][3], cuFloatComplex (&cv1)[3], cuFloatComplex (&out)[3])
463 {
464  for(int n=0; n<3; n++)
465  {
466  out[n] = cuCaddf(cuCmulf(make_cuFloatComplex(m1[n][0],0), cv1[0]),
467  cuCaddf(cuCmulf(make_cuFloatComplex(m1[n][1],0), cv1[1]),
468  cuCmulf(make_cuFloatComplex(m1[n][2],0), cv1[2])));
469  }
470 }
471 
472 /**
473  Matrix-vector multiplication.
474 
475  Uses mat from constant memory.
476 
477  @param cv1 Array of 3 float.
478  @param out Array of 3 float.
479  @param vec Whether to rotate as a vector or as a point.
480  */
481 __device__ __inline__ void matVec4(float (&mat)[16], float (&cv1)[3], float (&out)[3], bool vec = false)
482 {
483  if (vec)
484  {
485  for(int n=0; n<3; n++)
486  {
487  out[n] = mat[n*4] * cv1[0] + mat[1+n*4] * cv1[1] + mat[2+n*4] * cv1[2];
488  }
489  }
490 
491  else
492  {
493  for(int n=0; n<3; n++)
494  {
495  out[n] = mat[n*4] * cv1[0] + mat[1+n*4] * cv1[1] + mat[2+n*4] * cv1[2] + mat[3+n*4];
496  }
497  }
498 }
499 
500 /**
501  Matrix-vector multiplication.
502 
503  Multiply a vector by the inverse of a matrix.
504 
505  @param cv1 Array of 3 float.
506  @param out Array of 3 float.
507  @param vec Whether to rotate as a vector or as a point.
508  */
509 __device__ __inline__ void invmatVec4(float (&mat)[16], float (&cv1)[3], float (&out)[3], bool vec = false)
510 {
511  if (vec)
512  {
513  for(int n=0; n<3; n++)
514  {
515  out[n] = mat[n] * cv1[0] + mat[n+4] * cv1[1] + mat[n+8] * cv1[2];
516  }
517  }
518 
519  else
520  {
521  float temp;
522  for(int n=0; n<3; n++)
523  {
524  temp = -mat[n]*mat[3] - mat[n+4]*mat[7] - mat[n+8]*mat[11];
525  out[n] = mat[n] * cv1[0] + mat[n+4] * cv1[1] + mat[n+8] * cv1[2] + temp;
526  }
527  }
528 }
529 
530 /**
531  * Take complex exponential.
532  *
533  * Take complex exponential by decomposing into sine and cosine.
534  *
535  * @return res cuFloatComplex number.
536  */
537 __device__ __inline__ cuFloatComplex cuCexpf(cuFloatComplex z)
538 {
539  cuFloatComplex res;
540  float ys, yc;
541  float t = exp(z.x);
542  sincosf(z.y, &ys, &yc);
543  res.x = t*yc;
544  res.y = t*ys;
545 
546  return res;
547 }
548 
549 
550 /**
551  * Add scalar float to complex number.
552  *
553  * @return res cuFloatComplex number.
554  */
555 __device__ __inline__ cuFloatComplex cuCaddSf(cuFloatComplex a, float b)
556 {
557  cuFloatComplex res;
558  res.x = a.x + b;
559  res.y = a.y;
560 
561  return res;
562 }
563 
564 /**
565  * Add complex number to scalar float.
566  *
567  * @return res cuFloatComplex number.
568  */
569 __device__ __inline__ cuFloatComplex cuCaddSf(float a, cuFloatComplex b)
570 {
571  cuFloatComplex res;
572  res.x = b.x + a;
573  res.y = b.y;
574 
575  return res;
576 }
577 
578 /**
579  * Subtract scalar float from complex number.
580  *
581  * @return res cuFloatComplex number.
582  */
583 __device__ __inline__ cuFloatComplex cuCsubSf(cuFloatComplex a, float b)
584 {
585  cuFloatComplex res;
586  res.x = a.x - b;
587  res.y = a.y;
588 
589  return res;
590 }
591 
592 /**
593  * Add complex number to scalar float.
594  *
595  * @return res cuFloatComplex number.
596  */
597 __device__ __inline__ cuFloatComplex cuCsubSf(float a, cuFloatComplex b)
598 {
599  cuFloatComplex res;
600  res.x = a - b.x;
601  res.y = -b.y;
602 
603  return res;
604 }
605 
606 /**
607  * Multiply complex number by scalar float.
608  *
609  * @return res cuFloatComplex number.
610  */
611 __device__ __inline__ cuFloatComplex cuCmulSf(cuFloatComplex a, float b)
612 {
613  cuFloatComplex res;
614  res.x = a.x*b;
615  res.y = a.y*b;
616 
617  return res;
618 }
619 
620 /**
621  * Multiply complex number by scalar float.
622  *
623  * @return res cuFloatComplex number.
624  */
625 __device__ __inline__ cuFloatComplex cuCmulSf(float a, cuFloatComplex b)
626 {
627  cuFloatComplex res;
628  res.x = a*b.x;
629  res.y = a*b.y;
630 
631  return res;
632 }
633 
634 /**
635  * Divide complex number by scalar float.
636  *
637  * @return res cuFloatComplex number.
638  */
639 __device__ __inline__ cuFloatComplex cuCdivSf(cuFloatComplex a, float b)
640 {
641  cuFloatComplex res;
642  res.x = a.x/b;
643  res.y = a.y/b;
644 
645  return res;
646 }
647 
648 /**
649  * Divide scalar by complex float.
650  *
651  * @return res cuFloatComplex number.
652  */
653 __device__ __inline__ cuFloatComplex cuCdivSf(float a, cuFloatComplex b)
654 {
655  cuFloatComplex ac, res;
656  ac = make_cuFloatComplex(a, 0.);
657 
658  res = cuCdivf(ac, b);
659 
660  return res;
661 }
__device__ __inline__ cuFloatComplex cuCsubSf(cuFloatComplex a, float b)
Definition: GUtils.h:583
__device__ __inline__ void conja(cuFloatComplex(&cv)[3], cuFloatComplex(&out)[3])
Definition: GUtils.h:209
__device__ __inline__ cuFloatComplex cuCaddSf(cuFloatComplex a, float b)
Definition: GUtils.h:555
__device__ __inline__ void snell_t(float(&vin)[3], float(&normal)[3], float mu, float(&out)[3])
Definition: GUtils.h:381
__device__ __inline__ void diff(float(&v1)[3], float(&v2)[3], float(&out)[3])
Definition: GUtils.h:162
__device__ __inline__ void invmatVec4(float(&mat)[16], float(&cv1)[3], float(&out)[3], bool vec=false)
Definition: GUtils.h:509
__device__ __inline__ void abs(float(&v)[3], float &out)
Definition: GUtils.h:195
__device__ __inline__ void s_mult(float(&v)[3], float &s, float(&out)[3])
Definition: GUtils.h:268
__device__ __inline__ void matVec4(float(&mat)[16], float(&cv1)[3], float(&out)[3], bool vec=false)
Definition: GUtils.h:481
__device__ __inline__ cuFloatComplex cuCdivSf(cuFloatComplex a, float b)
Definition: GUtils.h:639
__device__ __inline__ void snell(cuFloatComplex(&cvin)[3], float(&normal)[3], cuFloatComplex(&out)[3])
Definition: GUtils.h:336
__device__ __inline__ void add(float(&v1)[3], float(&v2)[3], float(&out)[3])
Definition: GUtils.h:251
__device__ __inline__ void matDiff(float(&m1)[3][3], float(&m2)[3][3], float(&out)[3][3])
Definition: GUtils.h:426
__device__ __inline__ void dot(float(&v1)[3], float(&v2)[3], float &out)
Definition: GUtils.h:21
__device__ __inline__ void dyad(float(&v1)[3], float(&v2)[3], float(&out)[3][3])
Definition: GUtils.h:407
__device__ __inline__ void ext(float(&v1)[3], float(&v2)[3], float(&out)[3])
Definition: GUtils.h:97
__device__ __inline__ void normalize(float(&v)[3], float(&out)[3])
Definition: GUtils.h:225
__device__ __inline__ cuFloatComplex cuCexpf(cuFloatComplex z)
Definition: GUtils.h:537
__device__ __inline__ cuFloatComplex cuCmulSf(cuFloatComplex a, float b)
Definition: GUtils.h:611
__device__ __inline__ void matVec(float(&m1)[3][3], float(&v1)[3], float(&out)[3])
Definition: GUtils.h:445