PyPO User Manual
 
Loading...
Searching...
No Matches
GUtils.h
Go to the documentation of this file.
1#include <math.h>
2#include <cuda.h>
3#include <cuComplex.h>
4
5/*! \file GUtils.h
6 \brief Linear algebra functions for the CUDA version of PyPO.
7
8 Contains float overloaded functions for doing basic 3D vector operations.
9 For the CUDA complex valued linear algebra, we employ the cuComplex.h library.
10*/
11
12/**
13 * Dot product.
14 *
15 * Take the dot (inner) product of two real valued arrays of size 3.
16 *
17 * @param v1 Array of 3 float.
18 * @param v2 Array of 3 float.
19 * @param out Scalar float.
20 */
21__device__ __inline__ void dot(float (&v1)[3], float (&v2)[3], float &out)
22{
23 out = 0;
24
25 for(int n=0; n<3; n++)
26 {
27 out += v1[n] * v2[n];
28 }
29}
30
31/**
32 * Dot product.
33 *
34 * Take the dot (inner) product of two complex valued float arrays of size 3.
35 *
36 * @param cv1 Array of 3 complex float.
37 * @param cv2 Array of 3 complex float.
38 * @param out Scalar complex float.
39 */
40__device__ __inline__ void dot(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex &out)
41{
42 out = make_cuFloatComplex(0, 0);
43
44 for(int n=0; n<3; n++)
45 {
46 out = cuCaddf(cuCmulf(cuConjf(cv1[n]), cv2[n]), out);
47 }
48}
49
50/**
51 * Dot product.
52 *
53 * Take the dot (inner) product of one complex valued and one real valued float array of size 3.
54 *
55 * @param cv1 Array of 3 complex float.
56 * @param v2 Array of 3 float.
57 * @param out Scalar complex float.
58 */
59__device__ __inline__ void dot(cuFloatComplex (&cv1)[3], float (&v2)[3], cuFloatComplex &out)
60{
61 out = make_cuFloatComplex(0, 0);
62
63 for(int n=0; n<3; n++)
64 {
65 out = cuCaddf(cuCmulf(cuConjf(cv1[n]), make_cuFloatComplex(v2[n], 0)), out);
66 }
67}
68
69/**
70 * Dot product.
71 *
72 * Take the dot (inner) product of one real valued and one complex valued float array of size 3.
73 *
74 * @param v1 Array of 3 float.
75 * @param cv2 Array of 3 complex float.
76 * @param out Scalar complex float.
77 */
78__device__ __inline__ void dot(float (&v1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex &out)
79{
80 out = make_cuFloatComplex(0, 0);
81
82 for(int n=0; n<3; n++)
83 {
84 out = cuCaddf(cuCmulf(make_cuFloatComplex(v1[n], 0), cv2[n]), out);
85 }
86}
87
88/**
89 * Cross product.
90 *
91 * Take the cross (outer) product of two real valued float arrays of size 3.
92 *
93 * @param v1 Array of 3 float.
94 * @param v2 Array of 3 float.
95 * @param out Array of 3 float.
96 */
97__device__ __inline__ void ext(float (&v1)[3], float (&v2)[3], float (&out)[3])
98{
99 out[0] = v1[1]*v2[2] - v1[2]*v2[1];
100 out[1] = v1[2]*v2[0] - v1[0]*v2[2];
101 out[2] = v1[0]*v2[1] - v1[1]*v2[0];
102}
103
104
105/**
106 * Cross product.
107 *
108 * Take the cross (outer) product of two complex valued float arrays of size 3.
109 *
110 * @param cv1 Array of 3 complex float.
111 * @param cv2 Array of 3 complex float.
112 * @param out Array of 3 complex float.
113 */
114__device__ __inline__ void ext(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
115{
116 out[0] = cuCsubf(cuCmulf(cv1[1], cv2[2]), cuCmulf(cv1[2], cv2[1]));
117 out[1] = cuCsubf(cuCmulf(cv1[2], cv2[0]), cuCmulf(cv1[0], cv2[2]));
118 out[2] = cuCsubf(cuCmulf(cv1[0], cv2[1]), cuCmulf(cv1[1], cv2[0]));
119}
120
121/**
122 * Cross product.
123 *
124 * Take the cross (outer) product of one complex valued and one real valued float array of size 3.
125 *
126 * @param cv1 Array of 3 complex float.
127 * @param v2 Array of 3 float.
128 * @param out Array of 3 complex float.
129 */
130__device__ __inline__ void ext(cuFloatComplex (&cv1)[3], float (&v2)[3], cuFloatComplex (&out)[3])
131{
132 out[0] = cuCsubf(cuCmulf(cv1[1], make_cuFloatComplex(v2[2],0)), cuCmulf(cv1[2], make_cuFloatComplex(v2[1],0)));
133 out[1] = cuCsubf(cuCmulf(cv1[2], make_cuFloatComplex(v2[0],0)), cuCmulf(cv1[0], make_cuFloatComplex(v2[2],0)));
134 out[2] = cuCsubf(cuCmulf(cv1[0], make_cuFloatComplex(v2[1],0)), cuCmulf(cv1[1], make_cuFloatComplex(v2[0],0)));
135}
136
137/**
138 * Cross product.
139 *
140 * Take the cross (outer) product of one real valued and one complex valued float array of size 3.
141 *
142 * @param v1 Array of 3 float.
143 * @param cv2 Array of 3 complex float.
144 * @param out Array of 3 complex float.
145 */
146__device__ __inline__ void ext(float (&v1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
147{
148 out[0] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[1],0), cv2[2]), cuCmulf(make_cuFloatComplex(v1[2],0), cv2[1]));
149 out[1] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[2],0), cv2[0]), cuCmulf(make_cuFloatComplex(v1[0],0), cv2[2]));
150 out[2] = cuCsubf(cuCmulf(make_cuFloatComplex(v1[0],0), cv2[1]), cuCmulf(make_cuFloatComplex(v1[1],0), cv2[0]));
151}
152
153/**
154 * Component-wise vector difference.
155 *
156 * Subtract two real valued vectors of size 3, element-wise.
157 *
158 * @param v1 Array of 3 float.
159 * @param v2 Array of 3 float.
160 * @param out Array of 3 float.
161 */
162__device__ __inline__ void diff(float (&v1)[3], float (&v2)[3], float (&out)[3])
163{
164 for(int n=0; n<3; n++)
165 {
166 out[n] = v1[n] - v2[n];
167 }
168}
169
170/**
171 * Component-wise vector difference.
172 *
173 * Subtract two complex valued vectors of size 3, element-wise.
174 *
175 * @param cv1 Array of 3 complex float.
176 * @param cv2 Array of 3 complex float.
177 * @param out Array of 3 complex float.
178 */
179__device__ __inline__ void diff(cuFloatComplex (&cv1)[3], cuFloatComplex (&cv2)[3], cuFloatComplex (&out)[3])
180{
181 for(int n=0; n<3; n++)
182 {
183 out[n] = cuCsubf(cv1[n], cv2[n]);
184 }
185}
186
187/**
188 * Absolute value.
189 *
190 * Calculate absolute value of real valued vector of size 3.
191 *
192 * @param v Array of 3 float.
193 * @param out Scalar float.
194 */
195__device__ __inline__ void abs(float (&v)[3], float &out)
196{
197 dot(v, v, out);
198 out = sqrt(out);
199}
200
201/**
202 * Conjugate.
203 *
204 * Conjugate complex valued vector of size 3.
205 *
206 * @param cv Array of 3 complex float.
207 * @param out Array of 3 complex float.
208 */
209__device__ __inline__ void conja(cuFloatComplex (&cv)[3], cuFloatComplex (&out)[3])
210{
211 for(int n=0; n<3; n++)
212 {
213 out[n] = cuConjf(cv[n]);
214 }
215}
216
217/**
218 * Normalize vector.
219 *
220 * Normalize real valued vector of size 3.
221 *
222 * @param v Array of 3 float.
223 * @param out Array of 3 float.
224 */
225__device__ __inline__ void normalize(float (&v)[3], float (&out)[3])
226{
227 float norm;
228 abs(v, norm);
229
230
231 if (norm == 0)
232 {
233 norm = 1;
234 }
235
236 for( int n=0; n<3; n++)
237 {
238 out[n] = v[n] / norm;
239 }
240}
241
242/**
243 * Vector addition.
244 *
245 * Add two real valued vectors of size 3 element-wise.
246 *
247 * @param v1 Array of 3 float.
248 * @param v2 Array of 3 float.
249 * @param out Array of 3 complex float.
250 */
251__device__ __inline__ void add(float (&v1)[3], float (&v2)[3], float (&out)[3])
252{
253 for(int n=0; n<3; n++)
254 {
255 out[n] = v1[n] + v2[n];
256 }
257}
258
259/**
260 * Scalar multiplication.
261 *
262 * Multiply real valued vector of size 3 by real scalar, element-wise.
263 *
264 * @param v Array of 3 float.
265 * @param s Scalar float.
266 * @param out Array of 3 float.
267 */
268__device__ __inline__ void s_mult(float (&v)[3], float &s, float (&out)[3])
269{
270 for(int n=0; n<3; n++)
271 {
272 out[n] = s * v[n];
273 }
274}
275
276/**
277 * Scalar multiplication.
278 *
279 * Multiply complex valued vector of size 3 by complex scalar, element-wise.
280 *
281 * @param cv Array of 3 complex float.
282 * @param cs Scalar complex float.
283 * @param out Array of 3 complex float.
284 */
285__device__ __inline__ void s_mult(cuFloatComplex (&cv)[3], cuFloatComplex &cs, cuFloatComplex (&out)[3])
286{
287 for(int n=0; n<3; n++)
288 {
289 out[n] = cuCmulf(cs, cv[n]);
290 }
291}
292
293/**
294 * Scalar multiplication.
295 *
296 * Multiply real valued vector of size 3 by complex scalar, element-wise.
297 *
298 * @param v Array of 3 float.
299 * @param cs Scalar complex float.
300 * @param out Array of 3 complex float.
301 */
302__device__ __inline__ void s_mult(float (&v)[3], cuFloatComplex &cs, cuFloatComplex (&out)[3])
303{
304 for(int n=0; n<3; n++)
305 {
306 out[n] = cuCmulf(cs, make_cuFloatComplex(v[n],0));
307 }
308}
309
310/**
311 * Scalar multiplication.
312 *
313 * Multiply complex valued vector of size 3 by real scalar, element-wise.
314 *
315 * @param cv Array of 3 complex float.
316 * @param s Scalar float.
317 * @param out Array of 3 complex float.
318 */
319__device__ __inline__ void s_mult(cuFloatComplex (&cv)[3], const float &s, cuFloatComplex (&out)[3])
320{
321 for(int n=0; n<3; n++)
322 {
323 out[n] = cuCmulf(make_cuFloatComplex(s,0), cv[n]);
324 }
325}
326
327/**
328 * Snell's law reflection.
329 *
330 * Calculate reflected direction vector from incoming direction and normal vector.
331 *
332 * @param cvin Array of 3 complex float, incoming direction vector.
333 * @param normal Array of 3 float, normal vector of surface.
334 * @param out Array of 3 complex float.
335 */
336__device__ __inline__ void snell(cuFloatComplex (&cvin)[3], float (&normal)[3], cuFloatComplex (&out)[3])
337{
338 cuFloatComplex cfactor;
339 dot(cvin, normal, cfactor);
340
341 cfactor = cuCmulf(make_cuFloatComplex(2.,0), cfactor);
342
343 cuFloatComplex rhs[3];
344 s_mult(normal, cfactor, rhs);
345
346 diff(cvin, rhs, out);
347}
348
349/**
350 * Snell's law reflection.
351 *
352 * Calculate reflected direction vector from incoming direction and normal vector.
353 *
354 * @param vin Array of 3 float, incoming direction vector.
355 * @param normal Array of 3 float, normal vector of surface.
356 * @param out Array of 3 float.
357 */
358__device__ __inline__ void snell(float (&vin)[3], float (&normal)[3], float (&out)[3])
359{
360 float factor;
361 dot(vin, normal, factor);
362
363 factor = 2. * factor;
364
365 float rhs[3];
366 s_mult(normal, factor, rhs);
367
368 diff(vin, rhs, out);
369}
370
371/**
372 * Snell's law refraction.
373 *
374 * Calculate refracted direction vector from incoming direction and normal vector.
375 *
376 * @param vin Array of 3 double/float, incoming direction vector.
377 * @param normal Array of 3 double/float, normal vector of surface.
378 * @param mu Ratio of n1 to n2.
379 * @param out Array of 3 double/float.
380 */
381__device__ __inline__ void snell_t(float (&vin)[3], float (&normal)[3], float mu, float (&out)[3])
382{
383 float in_dot_n, factor1;
384 float term1[3], term2[3], temp1[3], temp2[3];
385
386 dot(normal, vin, in_dot_n);
387
388 factor1 = sqrt(1 - mu*mu * (1 - in_dot_n*in_dot_n));
389 s_mult(normal, factor1, term1);
390
391 s_mult(normal, in_dot_n, temp1);
392 diff(vin, temp1, temp2);
393 s_mult(temp2, mu, term2);
394
395 add(term1, term2, out);
396}
397
398/**
399 * Dyadic product.
400 *
401 * Calculate dyadic product between two real valued float vectors of size 3.
402 *
403 * @param v1 Array of 3 float.
404 * @param v2 Array of 3 float.
405 * @param out Array of 3 float, nested inside array of size 3.
406 */
407__device__ __inline__ void dyad(float (&v1)[3], float (&v2)[3], float (&out)[3][3])
408{
409 for(int n=0; n<3; n++)
410 {
411 out[n][0] = v1[n] * v2[0];
412 out[n][1] = v1[n] * v2[1];
413 out[n][2] = v1[n] * v2[2];
414 }
415}
416
417/**
418 * Matrix difference, element wise.
419 *
420 * Subtract two 3x3 matrices, element wise.
421 *
422 * @param m1 Array of 3 float, nested inside array of size 3.
423 * @param m2 Array of 3 float, nested inside array of size 3.
424 * @param out Array of 3 float, nested inside array of size 3.
425 */
426__device__ __inline__ void matDiff(float (&m1)[3][3], float (&m2)[3][3], float (&out)[3][3])
427{
428 for(int n=0; n<3; n++)
429 {
430 out[n][0] = m1[n][0] - m2[n][0];
431 out[n][1] = m1[n][1] - m2[n][1];
432 out[n][2] = m1[n][2] - m2[n][2];
433 }
434}
435
436/**
437 * Matrix-vector product.
438 *
439 * Multiply a real valued 3x3 matrix and a real valued size 3 vector to generate a new real valued size 3 vector.
440 *
441 * @param m1 Array of 3 float, nested inside array of size 3.
442 * @param v1 Array of 3 float.
443 * @param out Array of 3 float.
444 */
445__device__ __inline__ void matVec(float (&m1)[3][3], float (&v1)[3], float (&out)[3])
446{
447 for(int n=0; n<3; n++)
448 {
449 out[n] = m1[n][0] * v1[0] + m1[n][1] * v1[1] + m1[n][2] * v1[2];
450 }
451}
452
453/**
454 * Matrix-vector product.
455 *
456 * Multiply a real valued 3x3 matrix and a complex valued size 3 vector to generate a new complex valued size 3 vector.
457 *
458 * @param m1 Array of 3 float, nested inside array of size 3.
459 * @param cv1 Array of 3 complex float.
460 * @param out Array of 3 complex float.
461 */
462__device__ __inline__ void matVec(float (&m1)[3][3], cuFloatComplex (&cv1)[3], cuFloatComplex (&out)[3])
463{
464 for(int n=0; n<3; n++)
465 {
466 out[n] = cuCaddf(cuCmulf(make_cuFloatComplex(m1[n][0],0), cv1[0]),
467 cuCaddf(cuCmulf(make_cuFloatComplex(m1[n][1],0), cv1[1]),
468 cuCmulf(make_cuFloatComplex(m1[n][2],0), cv1[2])));
469 }
470}
471
472/**
473 Matrix-vector multiplication.
474
475 Uses mat from constant memory.
476
477 @param cv1 Array of 3 float.
478 @param out Array of 3 float.
479 @param vec Whether to rotate as a vector or as a point.
480 */
481__device__ __inline__ void matVec4(float (&mat)[16], float (&cv1)[3], float (&out)[3], bool vec = false)
482{
483 if (vec)
484 {
485 for(int n=0; n<3; n++)
486 {
487 out[n] = mat[n*4] * cv1[0] + mat[1+n*4] * cv1[1] + mat[2+n*4] * cv1[2];
488 }
489 }
490
491 else
492 {
493 for(int n=0; n<3; n++)
494 {
495 out[n] = mat[n*4] * cv1[0] + mat[1+n*4] * cv1[1] + mat[2+n*4] * cv1[2] + mat[3+n*4];
496 }
497 }
498}
499
500/**
501 Matrix-vector multiplication.
502
503 Multiply a vector by the inverse of a matrix.
504
505 @param cv1 Array of 3 float.
506 @param out Array of 3 float.
507 @param vec Whether to rotate as a vector or as a point.
508 */
509__device__ __inline__ void invmatVec4(float (&mat)[16], float (&cv1)[3], float (&out)[3], bool vec = false)
510{
511 if (vec)
512 {
513 for(int n=0; n<3; n++)
514 {
515 out[n] = mat[n] * cv1[0] + mat[n+4] * cv1[1] + mat[n+8] * cv1[2];
516 }
517 }
518
519 else
520 {
521 float temp;
522 for(int n=0; n<3; n++)
523 {
524 temp = -mat[n]*mat[3] - mat[n+4]*mat[7] - mat[n+8]*mat[11];
525 out[n] = mat[n] * cv1[0] + mat[n+4] * cv1[1] + mat[n+8] * cv1[2] + temp;
526 }
527 }
528}
529
530/**
531 * Take complex exponential.
532 *
533 * Take complex exponential by decomposing into sine and cosine.
534 *
535 * @return res cuFloatComplex number.
536 */
537__device__ __inline__ cuFloatComplex expCo(cuFloatComplex z)
538{
539 cuFloatComplex res;
540 float t = exp(z.x);
541 float ys = sin(z.y);
542 float yc = cos(z.y);
543 res.x = t*yc;
544 res.y = t*ys;
545
546 return res;
547}
__device__ __inline__ void conja(cuFloatComplex(&cv)[3], cuFloatComplex(&out)[3])
Definition GUtils.h:209
__device__ __inline__ void snell_t(float(&vin)[3], float(&normal)[3], float mu, float(&out)[3])
Definition GUtils.h:381
__device__ __inline__ cuFloatComplex expCo(cuFloatComplex z)
Definition GUtils.h:537
__device__ __inline__ void diff(float(&v1)[3], float(&v2)[3], float(&out)[3])
Definition GUtils.h:162
__device__ __inline__ void invmatVec4(float(&mat)[16], float(&cv1)[3], float(&out)[3], bool vec=false)
Definition GUtils.h:509
__device__ __inline__ void abs(float(&v)[3], float &out)
Definition GUtils.h:195
__device__ __inline__ void s_mult(float(&v)[3], float &s, float(&out)[3])
Definition GUtils.h:268
__device__ __inline__ void matVec4(float(&mat)[16], float(&cv1)[3], float(&out)[3], bool vec=false)
Definition GUtils.h:481
__device__ __inline__ void snell(cuFloatComplex(&cvin)[3], float(&normal)[3], cuFloatComplex(&out)[3])
Definition GUtils.h:336
__device__ __inline__ void add(float(&v1)[3], float(&v2)[3], float(&out)[3])
Definition GUtils.h:251
__device__ __inline__ void matDiff(float(&m1)[3][3], float(&m2)[3][3], float(&out)[3][3])
Definition GUtils.h:426
__device__ __inline__ void dot(float(&v1)[3], float(&v2)[3], float &out)
Definition GUtils.h:21
__device__ __inline__ void dyad(float(&v1)[3], float(&v2)[3], float(&out)[3][3])
Definition GUtils.h:407
__device__ __inline__ void ext(float(&v1)[3], float(&v2)[3], float(&out)[3])
Definition GUtils.h:97
__device__ __inline__ void normalize(float(&v)[3], float(&out)[3])
Definition GUtils.h:225
__device__ __inline__ void matVec(float(&m1)[3][3], float(&v1)[3], float(&out)[3])
Definition GUtils.h:445