8 #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
27 inline void gpuAssert(cudaError_t code,
const char *file,
int line,
bool abort =
true)
29 if (code != cudaSuccess)
31 fprintf(stderr,
"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
32 if (abort) exit(code);
43 inline std::vector<float*>
cuMallFloat(
int &n,
int &size);
45 inline std::vector<cuFloatComplex*>
cuMallComplex(
int &n,
int &size);
48 inline void cuMemCpFloat(std::vector<float*> vecFloat, std::vector<float*> vecData,
int &size,
bool H2D =
true);
49 inline void cuMemCpComplex(std::vector<cuFloatComplex*> vecFloat, std::vector<cuFloatComplex*> vecData,
int &size,
bool H2D =
true);
66 std::vector<float*> out(n,
nullptr);
67 for(
int i=0; i<n; i++)
70 gpuErrchk( cudaMalloc((
void**)&p, size *
sizeof(
float)) );
86 std::vector<float*> out(n,
nullptr);
87 for(
int i=0; i<n; i++)
89 float *p =
new float[size];
105 std::vector<cuFloatComplex*> out(n,
nullptr);
106 for(
int i=0; i<n; i++)
109 gpuErrchk( cudaMalloc((
void**)&p, size *
sizeof(cuFloatComplex)) );
125 std::vector<cuFloatComplex*> out(n,
nullptr);
126 for(
int i=0; i<n; i++)
128 cuFloatComplex *p =
new cuFloatComplex[size];
144 int n = vecFloat.size();
145 for(
int i=0; i<n; i++)
147 if(H2D) {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size *
sizeof(
float), cudaMemcpyHostToDevice) );}
149 else {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size *
sizeof(
float), cudaMemcpyDeviceToHost) );}
161 inline void MemUtils::cuMemCpComplex(std::vector<cuFloatComplex*> vecFloat, std::vector<cuFloatComplex*> vecData,
int &size,
bool H2D)
163 int n = vecFloat.size();
164 for(
int i=0; i<n; i++)
166 if(H2D) {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size *
sizeof(cuFloatComplex), cudaMemcpyHostToDevice) );}
168 else {gpuErrchk( cudaMemcpy(vecFloat[i], vecData[i], size *
sizeof(cuFloatComplex), cudaMemcpyDeviceToHost) );}
179 int n = vecFloat.size();
180 for(
int i=0; i<n; i++)
193 int n = vecFloat.size();
194 for(
int i=0; i<n; i++)