PyPO User Manual
MemUtils.h
Go to the documentation of this file.
1 #include <iostream>
2 #include <vector>
3 #include <cuda.h>
4 #include <cuComplex.h>
5 
6 #ifndef __MemUtils_h
7 #define __MemUtils_h
8 #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
9 
10 /*! \file MemUtils.h
11  \brief Utility class for CUDA memory allocations.
12 
13  Contains often used memory allocation/copy/deallocation operations between host and device.
14 */
15 
16 
17 /**
18  * Check CUDA API error status of call.
19  *
20  * Wrapper for finding errors in CUDA API calls.
21  *
22  * @param code The errorcode returned from failed API call.
23  * @param file The file in which failure occured.
24  * @param line The line in file in which error occured.
25  * @param abort Exit code upon error.
26  */
27 inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
28 {
29  if (code != cudaSuccess)
30  {
31  fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
32  if (abort) exit(code);
33  }
34 }
35 
36 /**
37  * @class
38  * Utility class for memory allocations/copies between CUDA and host.
39  */
40 class MemUtils
41 {
42  public:
43  inline std::vector<float*> cuMallFloat(int &n, int &size);
44  inline std::vector<float*> cuMallFloatStack(int &n, int &size);
45  inline std::vector<cuFloatComplex*> cuMallComplex(int &n, int &size);
46  inline std::vector<cuFloatComplex*> cuMallComplexStack(int &n, int &size);
47 
48  inline void cuMemCpFloat(std::vector<float*> vecFloat, std::vector<float*> vecData, int &size, bool H2D = true);
49  inline void cuMemCpComplex(std::vector<cuFloatComplex*> vecFloat, std::vector<cuFloatComplex*> vecData, int &size, bool H2D = true);
50 
51  inline void deallocFloatHost(std::vector<float*> vecFloat);
52  inline void deallocComplexHost(std::vector<cuFloatComplex*> vecFloat);
53 };
54 #endif
55 
56 /**
57  * Allocate memory for floats on GPU and return pointers.
58  *
59  * @param n Number of pointers to allocate on GPU.
60  * @param size Number of elements to allocate.
61  *
62  * @return out Vector containing GPU-allocated pointers.
63  */
64 inline std::vector<float*> MemUtils::cuMallFloat(int &n, int &size)
65 {
66  std::vector<float*> out(n, nullptr);
67  for(int i=0; i<n; i++)
68  {
69  float *p;
70  gpuErrchk( cudaMalloc((void**)&p, size * sizeof(float)) );
71  out[i] = p;
72  }
73  return out;
74 }
75 
76 /**
77  * Allocate memory for floats on host stack and return pointers.
78  *
79  * @param n Number of pointers to allocate on stack.
80  * @param size Number of elements to allocate.
81  *
82  * @return out Vector containing stack-allocated pointers.
83  */
84 inline std::vector<float*> MemUtils::cuMallFloatStack(int &n, int &size)
85 {
86  std::vector<float*> out(n, nullptr);
87  for(int i=0; i<n; i++)
88  {
89  float *p = new float[size];
90  out[i] = p;
91  }
92  return out;
93 }
94 
95 /**
96  * Allocate memory for cuFloatComplex on GPU and return pointers.
97  *
98  * @param n Number of pointers to allocate on GPU.
99  * @param size Number of elements to allocate.
100  *
101  * @return out Vector containing GPU-allocated pointers.
102  */
103 inline std::vector<cuFloatComplex*> MemUtils::cuMallComplex(int &n, int &size)
104 {
105  std::vector<cuFloatComplex*> out(n, nullptr);
106  for(int i=0; i<n; i++)
107  {
108  cuFloatComplex *p;
109  gpuErrchk( cudaMalloc((void**)&p, size * sizeof(cuFloatComplex)) );
110  out[i] = p;
111  }
112  return out;
113 }
114 
115 /**
116  * Allocate memory for cuFloatComplex on local stack and return pointers.
117  *
118  * @param n Number of pointers to allocate on GPU.
119  * @param size Number of elements to allocate.
120  *
121  * @return out Vector containing stack-allocated pointers.
122  */
123 inline std::vector<cuFloatComplex*> MemUtils::cuMallComplexStack(int &n, int &size)
124 {
125  std::vector<cuFloatComplex*> out(n, nullptr);
126  for(int i=0; i<n; i++)
127  {
128  cuFloatComplex *p = new cuFloatComplex[size];
129  out[i] = p;
130  }
131  return out;
132 }
133 
134 /**
135  * Copy local arrays of floats to allocated memory on GPU.
136  *
137  * @param vecFloat Vector containing allocated pointers to arrays of floats.
138  * @param vecData Vector containing pointers to local arrays.
139  * @param size Number of elements to copy.
140  * @param H2D Direction of copy. If true (default), copy from host to device. If false, copy from device to host.
141  */
142 inline void MemUtils::cuMemCpFloat(std::vector<float*> vecFloat, std::vector<float*> vecData, int &size, bool H2D)
143 {
144  int n = vecFloat.size();
145  for(int i=0; i<n; i++)
146  {
147  if(H2D) {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size * sizeof(float), cudaMemcpyHostToDevice) );}
148 
149  else {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size * sizeof(float), cudaMemcpyDeviceToHost) );}
150  }
151 }
152 
153 /**
154  * Copy local arrays of cuFloatComplex to allocated memory on GPU.
155  *
156  * @param vecFloat Vector containing allocated pointers to arrays of floats.
157  * @param vecData Vector containing pointers to local arrays.
158  * @param size Number of elements to copy.
159  * @param H2D Direction of copy. If true (default), copy from host to device. If false, copy from device to host.
160  */
161 inline void MemUtils::cuMemCpComplex(std::vector<cuFloatComplex*> vecFloat, std::vector<cuFloatComplex*> vecData, int &size, bool H2D)
162 {
163  int n = vecFloat.size();
164  for(int i=0; i<n; i++)
165  {
166  if(H2D) {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size * sizeof(cuFloatComplex), cudaMemcpyHostToDevice) );}
167 
168  else {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size * sizeof(cuFloatComplex), cudaMemcpyDeviceToHost) );}
169  }
170 }
171 
172 /**
173  * Deallocate local arrays of floats.
174  *
175  * @param vecFloat Vector containing pointers to local arrays to deallocate.
176  */
177 inline void MemUtils::deallocComplexHost(std::vector<cuFloatComplex*> vecFloat)
178 {
179  int n = vecFloat.size();
180  for(int i=0; i<n; i++)
181  {
182  delete vecFloat[i];
183  }
184 }
185 
186 /**
187  * Deallocate local arrays of cuFloatComplex.
188  *
189  * @param vecFloat Vector containing pointers to local arrays to deallocate.
190  */
191 inline void MemUtils::deallocFloatHost(std::vector<float*> vecFloat)
192 {
193  int n = vecFloat.size();
194  for(int i=0; i<n; i++)
195  {
196  delete vecFloat[i];
197  }
198 }
void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
Definition: MemUtils.h:27
Definition: MemUtils.h:41
std::vector< float * > cuMallFloat(int &n, int &size)
Definition: MemUtils.h:64
void cuMemCpComplex(std::vector< cuFloatComplex * > vecFloat, std::vector< cuFloatComplex * > vecData, int &size, bool H2D=true)
Definition: MemUtils.h:161
std::vector< cuFloatComplex * > cuMallComplex(int &n, int &size)
Definition: MemUtils.h:103
void deallocComplexHost(std::vector< cuFloatComplex * > vecFloat)
Definition: MemUtils.h:177
std::vector< float * > cuMallFloatStack(int &n, int &size)
Definition: MemUtils.h:84
void cuMemCpFloat(std::vector< float * > vecFloat, std::vector< float * > vecData, int &size, bool H2D=true)
Definition: MemUtils.h:142
std::vector< cuFloatComplex * > cuMallComplexStack(int &n, int &size)
Definition: MemUtils.h:123
void deallocFloatHost(std::vector< float * > vecFloat)
Definition: MemUtils.h:191