PyPO User Manual
 
Loading...
Searching...
No Matches
MemUtils.h
Go to the documentation of this file.
1#include <iostream>
2#include <vector>
3#include <cuda.h>
4#include <cuComplex.h>
5
6#ifndef __MemUtils_h
7#define __MemUtils_h
8#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
9
10/*! \file MemUtils.h
11 \brief Utility class for CUDA memory allocations.
12
13 Contains often used memory allocation/copy/deallocation operations between host and device.
14*/
15
16
17/**
18 * Check CUDA API error status of call.
19 *
20 * Wrapper for finding errors in CUDA API calls.
21 *
22 * @param code The errorcode returned from failed API call.
23 * @param file The file in which failure occured.
24 * @param line The line in file in which error occured.
25 * @param abort Exit code upon error.
26 */
27inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
28{
29 if (code != cudaSuccess)
30 {
31 fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
32 if (abort) exit(code);
33 }
34}
35
36/**
37 * @class
38 * Utility class for memory allocations/copies between CUDA and host.
39 */
41{
42 public:
43 inline std::vector<float*> cuMallFloat(int &n, int &size);
44 inline std::vector<float*> cuMallFloatStack(int &n, int &size);
45 inline std::vector<cuFloatComplex*> cuMallComplex(int &n, int &size);
46 inline std::vector<cuFloatComplex*> cuMallComplexStack(int &n, int &size);
47
48 inline void cuMemCpFloat(std::vector<float*> vecFloat, std::vector<float*> vecData, int &size, bool H2D = true);
49 inline void cuMemCpComplex(std::vector<cuFloatComplex*> vecFloat, std::vector<cuFloatComplex*> vecData, int &size, bool H2D = true);
50
51 inline void deallocFloatHost(std::vector<float*> vecFloat);
52 inline void deallocComplexHost(std::vector<cuFloatComplex*> vecFloat);
53};
54#endif
55
56/**
57 * Allocate memory for floats on GPU and return pointers.
58 *
59 * @param n Number of pointers to allocate on GPU.
60 * @param size Number of elements to allocate.
61 *
62 * @return out Vector containing GPU-allocated pointers.
63 */
64inline std::vector<float*> MemUtils::cuMallFloat(int &n, int &size)
65{
66 std::vector<float*> out(n, nullptr);
67 for(int i=0; i<n; i++)
68 {
69 float *p;
70 gpuErrchk( cudaMalloc((void**)&p, size * sizeof(float)) );
71 out[i] = p;
72 }
73 return out;
74}
75
76/**
77 * Allocate memory for floats on host stack and return pointers.
78 *
79 * @param n Number of pointers to allocate on stack.
80 * @param size Number of elements to allocate.
81 *
82 * @return out Vector containing stack-allocated pointers.
83 */
84inline std::vector<float*> MemUtils::cuMallFloatStack(int &n, int &size)
85{
86 std::vector<float*> out(n, nullptr);
87 for(int i=0; i<n; i++)
88 {
89 float *p = new float[size];
90 out[i] = p;
91 }
92 return out;
93}
94
95/**
96 * Allocate memory for cuFloatComplex on GPU and return pointers.
97 *
98 * @param n Number of pointers to allocate on GPU.
99 * @param size Number of elements to allocate.
100 *
101 * @return out Vector containing GPU-allocated pointers.
102 */
103inline std::vector<cuFloatComplex*> MemUtils::cuMallComplex(int &n, int &size)
104{
105 std::vector<cuFloatComplex*> out(n, nullptr);
106 for(int i=0; i<n; i++)
107 {
108 cuFloatComplex *p;
109 gpuErrchk( cudaMalloc((void**)&p, size * sizeof(cuFloatComplex)) );
110 out[i] = p;
111 }
112 return out;
113}
114
115/**
116 * Allocate memory for cuFloatComplex on local stack and return pointers.
117 *
118 * @param n Number of pointers to allocate on GPU.
119 * @param size Number of elements to allocate.
120 *
121 * @return out Vector containing stack-allocated pointers.
122 */
123inline std::vector<cuFloatComplex*> MemUtils::cuMallComplexStack(int &n, int &size)
124{
125 std::vector<cuFloatComplex*> out(n, nullptr);
126 for(int i=0; i<n; i++)
127 {
128 cuFloatComplex *p = new cuFloatComplex[size];
129 out[i] = p;
130 }
131 return out;
132}
133
134/**
135 * Copy local arrays of floats to allocated memory on GPU.
136 *
137 * @param vecFloat Vector containing allocated pointers to arrays of floats.
138 * @param vecData Vector containing pointers to local arrays.
139 * @param size Number of elements to copy.
140 * @param H2D Direction of copy. If true (default), copy from host to device. If false, copy from device to host.
141 */
142inline void MemUtils::cuMemCpFloat(std::vector<float*> vecFloat, std::vector<float*> vecData, int &size, bool H2D)
143{
144 int n = vecFloat.size();
145 for(int i=0; i<n; i++)
146 {
147 if(H2D) {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size * sizeof(float), cudaMemcpyHostToDevice) );}
148
149 else {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size * sizeof(float), cudaMemcpyDeviceToHost) );}
150 }
151}
152
153/**
154 * Copy local arrays of cuFloatComplex to allocated memory on GPU.
155 *
156 * @param vecFloat Vector containing allocated pointers to arrays of floats.
157 * @param vecData Vector containing pointers to local arrays.
158 * @param size Number of elements to copy.
159 * @param H2D Direction of copy. If true (default), copy from host to device. If false, copy from device to host.
160 */
161inline void MemUtils::cuMemCpComplex(std::vector<cuFloatComplex*> vecFloat, std::vector<cuFloatComplex*> vecData, int &size, bool H2D)
162{
163 int n = vecFloat.size();
164 for(int i=0; i<n; i++)
165 {
166 if(H2D) {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size * sizeof(cuFloatComplex), cudaMemcpyHostToDevice) );}
167
168 else {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size * sizeof(cuFloatComplex), cudaMemcpyDeviceToHost) );}
169 }
170}
171
172/**
173 * Deallocate local arrays of floats.
174 *
175 * @param vecFloat Vector containing pointers to local arrays to deallocate.
176 */
177inline void MemUtils::deallocComplexHost(std::vector<cuFloatComplex*> vecFloat)
178{
179 int n = vecFloat.size();
180 for(int i=0; i<n; i++)
181 {
182 delete vecFloat[i];
183 }
184}
185
186/**
187 * Deallocate local arrays of cuFloatComplex.
188 *
189 * @param vecFloat Vector containing pointers to local arrays to deallocate.
190 */
191inline void MemUtils::deallocFloatHost(std::vector<float*> vecFloat)
192{
193 int n = vecFloat.size();
194 for(int i=0; i<n; i++)
195 {
196 delete vecFloat[i];
197 }
198}
void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
Definition MemUtils.h:27
Definition MemUtils.h:41
std::vector< float * > cuMallFloat(int &n, int &size)
Definition MemUtils.h:64
void cuMemCpComplex(std::vector< cuFloatComplex * > vecFloat, std::vector< cuFloatComplex * > vecData, int &size, bool H2D=true)
Definition MemUtils.h:161
std::vector< cuFloatComplex * > cuMallComplex(int &n, int &size)
Definition MemUtils.h:103
void deallocComplexHost(std::vector< cuFloatComplex * > vecFloat)
Definition MemUtils.h:177
std::vector< float * > cuMallFloatStack(int &n, int &size)
Definition MemUtils.h:84
void cuMemCpFloat(std::vector< float * > vecFloat, std::vector< float * > vecData, int &size, bool H2D=true)
Definition MemUtils.h:142
std::vector< cuFloatComplex * > cuMallComplexStack(int &n, int &size)
Definition MemUtils.h:123
void deallocFloatHost(std::vector< float * > vecFloat)
Definition MemUtils.h:191