code format
This commit is contained in:
@@ -1,318 +1,247 @@
|
||||
//---------------------------------------------------------------------------//
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017 StreamComputing
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
#ifndef OPENCL_COMPLEX_MATH
|
||||
#define OPENCL_COMPLEX_MATH
|
||||
|
||||
#define CONCAT(x, y) x##y
|
||||
#define FNAME(name, sufix) c##name##sufix
|
||||
|
||||
// float2
|
||||
#define clrealf(complex) complex.x;
|
||||
#define climagf(complex) complex.y;
|
||||
|
||||
// double2
|
||||
#define clreal(complex) complex.x;
|
||||
#define climag(complex) complex.y;
|
||||
|
||||
#define OPENCL_COMPLEX_MATH_FUNCS(complex_type, real_type, func_sufix, math_consts_sufix) \
|
||||
complex_type CONCAT(complex, func_sufix)(real_type r, real_type i) \
|
||||
{ \
|
||||
return (complex_type)(r, i); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(add, func_sufix)(complex_type x, complex_type y) \
|
||||
{ \
|
||||
return x + y; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sub, func_sufix)(complex_type x, complex_type y) \
|
||||
{ \
|
||||
return x - y; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(add_real, func_sufix)(complex_type z, real_type r) \
|
||||
{ \
|
||||
return (complex_type)(z.x + r, z.y); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sub_real, func_sufix)(complex_type z, real_type r) \
|
||||
{ \
|
||||
return (complex_type)(z.x - r, z.y); \
|
||||
} \
|
||||
\
|
||||
real_type FNAME(abs, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
return length(z); \
|
||||
} \
|
||||
\
|
||||
real_type FNAME(arg, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
return atan2(z.y, z.x); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(mul, func_sufix)(complex_type z1, complex_type z2) \
|
||||
{ \
|
||||
real_type x1 = z1.x; \
|
||||
real_type y1 = z1.y; \
|
||||
real_type x2 = z2.x; \
|
||||
real_type y2 = z2.y; \
|
||||
return (complex_type)(x1 * x2 - y1 * y2, x1 * y2 + x2 * y1); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(div, func_sufix)(complex_type z1, complex_type z2) \
|
||||
{ \
|
||||
real_type x1 = z1.x; \
|
||||
real_type y1 = z1.y; \
|
||||
real_type x2 = z2.x; \
|
||||
real_type y2 = z2.y; \
|
||||
real_type iabs_z2 = CONCAT(1.0, func_sufix) / FNAME(abs, func_sufix)(z2); \
|
||||
return (complex_type)( \
|
||||
((x1 * x2 * iabs_z2) + (y1 * y2 * iabs_z2)) * iabs_z2, \
|
||||
((y1 * x2 * iabs_z2) - (x1 * y2 * iabs_z2)) * iabs_z2 \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(mul_real, func_sufix)(complex_type z, real_type r) \
|
||||
{ \
|
||||
return z * r; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(div_real, func_sufix)(complex_type z, real_type r) \
|
||||
{ \
|
||||
return z / r; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(conj, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
return (complex_type)(z.x, -z.y); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(proj, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
if(isinf(z.x) || isinf(z.y)) \
|
||||
{ \
|
||||
return (complex_type)(INFINITY, (copysign(CONCAT(0.0, func_sufix), z.y))); \
|
||||
} \
|
||||
return z; \
|
||||
} \
|
||||
\
|
||||
real_type FNAME(norm, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
/* Returns the squared magnitude of the complex number z. */ \
|
||||
/* The norm calculated by this function is also known as */ \
|
||||
/* field norm or absolute square. */ \
|
||||
real_type x = z.x; \
|
||||
real_type y = z.y; \
|
||||
return x * x + y * y; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(polar, func_sufix)(real_type r, real_type theta) \
|
||||
{ \
|
||||
/* Returns a complex number with magnitude r and phase angle theta. */ \
|
||||
return (complex_type)(r * cos(theta), r * sin(theta)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(exp, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
/* The complex exponential function e^z for z = x+i*y */ \
|
||||
/* equals to e^x * cis(y), */ \
|
||||
/* or, e^x * (cos(y) + i*sin(y)) */ \
|
||||
real_type expx = exp(z.x); \
|
||||
return (complex_type)(expx * cos(z.y), expx * sin(z.y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(log, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
/* log(z) = log(abs(z)) + i * arg(z) */ \
|
||||
return (complex_type)(log(FNAME(abs, func_sufix)(z)),FNAME(arg, func_sufix)(z)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(log10, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
return FNAME(log, func_sufix)(z) / log(CONCAT(10.0, func_sufix)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(pow, func_sufix)(complex_type z1, complex_type z2) \
|
||||
{ \
|
||||
/* (z1)^(z2) = exp(z2 * log(z1)) = cexp(mul(z2, clog(z1))) */ \
|
||||
return \
|
||||
FNAME(exp, func_sufix)( \
|
||||
FNAME(mul, func_sufix)( \
|
||||
z2, \
|
||||
FNAME(log, func_sufix)(z1) \
|
||||
) \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sqrt, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
/* */ \
|
||||
real_type x = z.x; \
|
||||
real_type y = z.y; \
|
||||
if(x == CONCAT(0.0, func_sufix)) \
|
||||
{ \
|
||||
real_type t = sqrt(fabs(y) / 2); \
|
||||
return (complex_type)(t, y < CONCAT(0.0, func_sufix) ? -t : t); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
real_type t = sqrt(2 * FNAME(abs, func_sufix)(z) + fabs(x)); \
|
||||
real_type u = t / 2; \
|
||||
return x > CONCAT(0.0, func_sufix) \
|
||||
? (complex_type)(u, y / t) \
|
||||
: (complex_type)(fabs(y) / t, y < CONCAT(0.0, func_sufix) ? -u : u); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sin, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
const real_type x = z.x; \
|
||||
const real_type y = z.y; \
|
||||
return (complex_type)(sin(x) * cosh(y), cos(x) * sinh(y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sinh, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
const real_type x = z.x; \
|
||||
const real_type y = z.y; \
|
||||
return (complex_type)(sinh(x) * cos(y), cosh(x) * sin(y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(cos, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
const real_type x = z.x; \
|
||||
const real_type y = z.y; \
|
||||
return (complex_type)(cos(x) * cosh(y), -sin(x) * sinh(y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(cosh, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
const real_type x = z.x; \
|
||||
const real_type y = z.y; \
|
||||
return (complex_type)(cosh(x) * cos(y), sinh(x) * sin(y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(tan, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
return FNAME(div, func_sufix)( \
|
||||
FNAME(sin, func_sufix)(z), \
|
||||
FNAME(cos, func_sufix)(z) \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(tanh, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
return FNAME(div, func_sufix)( \
|
||||
FNAME(sinh, func_sufix)(z), \
|
||||
FNAME(cosh, func_sufix)(z) \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(asinh, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
complex_type t = (complex_type)( \
|
||||
(z.x - z.y) * (z.x + z.y) + CONCAT(1.0, func_sufix), \
|
||||
CONCAT(2.0, func_sufix) * z.x * z.y \
|
||||
); \
|
||||
t = FNAME(sqrt, func_sufix)(t) + z; \
|
||||
return FNAME(log, func_sufix)(t); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(asin, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
complex_type t = (complex_type)(-z.y, z.x); \
|
||||
t = FNAME(asinh, func_sufix)(t); \
|
||||
return (complex_type)(t.y, -t.x); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(acosh, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
return \
|
||||
CONCAT(2.0, func_sufix) * FNAME(log, func_sufix)( \
|
||||
FNAME(sqrt, func_sufix)( \
|
||||
CONCAT(0.5, func_sufix) * (z + CONCAT(1.0, func_sufix)) \
|
||||
) \
|
||||
+ FNAME(sqrt, func_sufix)( \
|
||||
CONCAT(0.5, func_sufix) * (z - CONCAT(1.0, func_sufix)) \
|
||||
) \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(acos, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
complex_type t = FNAME(asin, func_sufix)(z);\
|
||||
return (complex_type)( \
|
||||
CONCAT(M_PI_2, math_consts_sufix) - t.x, -t.y \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(atanh, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
const real_type zy2 = z.y * z.y; \
|
||||
real_type n = CONCAT(1.0, func_sufix) + z.x; \
|
||||
real_type d = CONCAT(1.0, func_sufix) - z.x; \
|
||||
n = zy2 + n * n; \
|
||||
d = zy2 + d * d; \
|
||||
return (complex_type)( \
|
||||
CONCAT(0.25, func_sufix) * (log(n) - log(d)), \
|
||||
CONCAT(0.5, func_sufix) * atan2( \
|
||||
CONCAT(2.0, func_sufix) * z.y, \
|
||||
CONCAT(1.0, func_sufix) - zy2 - (z.x * z.x) \
|
||||
) \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(atan, func_sufix)(complex_type z) \
|
||||
{ \
|
||||
const real_type zx2 = z.x * z.x; \
|
||||
real_type n = z.y + CONCAT(1.0, func_sufix); \
|
||||
real_type d = z.y - CONCAT(1.0, func_sufix); \
|
||||
n = zx2 + n * n; \
|
||||
d = zx2 + d * d; \
|
||||
return (complex_type)( \
|
||||
CONCAT(0.5, func_sufix) * atan2( \
|
||||
CONCAT(2.0, func_sufix) * z.x, \
|
||||
CONCAT(1.0, func_sufix) - zx2 - (z.y * z.y) \
|
||||
), \
|
||||
CONCAT(0.25, func_sufix) * (log(n / d)) \
|
||||
); \
|
||||
}
|
||||
|
||||
// float complex
|
||||
typedef float2 cfloat;
|
||||
OPENCL_COMPLEX_MATH_FUNCS(float2, float, f, _F)
|
||||
|
||||
// double complex
|
||||
#ifdef cl_khr_fp64
|
||||
# ifdef OPENCL_COMPLEX_MATH_USE_DOUBLE
|
||||
# pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
typedef double2 cdouble;
|
||||
OPENCL_COMPLEX_MATH_FUNCS(double2, double, , )
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#undef FNAME
|
||||
#undef CONCAT
|
||||
//---------------------------------------------------------------------------//
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2017 StreamComputing
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//---------------------------------------------------------------------------//
|
||||
|
||||
#ifndef OPENCL_COMPLEX_MATH
|
||||
#define OPENCL_COMPLEX_MATH
|
||||
|
||||
#define CONCAT(x, y) x##y
|
||||
#define FNAME(name, sufix) c##name##sufix
|
||||
|
||||
// float2
|
||||
#define clrealf(complex) complex.x;
|
||||
#define climagf(complex) complex.y;
|
||||
|
||||
// double2
|
||||
#define clreal(complex) complex.x;
|
||||
#define climag(complex) complex.y;
|
||||
|
||||
#define OPENCL_COMPLEX_MATH_FUNCS(complex_type, real_type, func_sufix, math_consts_sufix) \
|
||||
complex_type CONCAT(complex, func_sufix)(real_type r, real_type i) { \
|
||||
return (complex_type)(r, i); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(add, func_sufix)(complex_type x, complex_type y) { \
|
||||
return x + y; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sub, func_sufix)(complex_type x, complex_type y) { \
|
||||
return x - y; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(add_real, func_sufix)(complex_type z, real_type r) { \
|
||||
return (complex_type)(z.x + r, z.y); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sub_real, func_sufix)(complex_type z, real_type r) { \
|
||||
return (complex_type)(z.x - r, z.y); \
|
||||
} \
|
||||
\
|
||||
real_type FNAME(abs, func_sufix)(complex_type z) { \
|
||||
return length(z); \
|
||||
} \
|
||||
\
|
||||
real_type FNAME(arg, func_sufix)(complex_type z) { \
|
||||
return atan2(z.y, z.x); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(mul, func_sufix)(complex_type z1, complex_type z2) { \
|
||||
real_type x1 = z1.x; \
|
||||
real_type y1 = z1.y; \
|
||||
real_type x2 = z2.x; \
|
||||
real_type y2 = z2.y; \
|
||||
return (complex_type)(x1 * x2 - y1 * y2, x1 * y2 + x2 * y1); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(div, func_sufix)(complex_type z1, complex_type z2) { \
|
||||
real_type x1 = z1.x; \
|
||||
real_type y1 = z1.y; \
|
||||
real_type x2 = z2.x; \
|
||||
real_type y2 = z2.y; \
|
||||
real_type iabs_z2 = CONCAT(1.0, func_sufix) / FNAME(abs, func_sufix)(z2); \
|
||||
return (complex_type)(((x1 * x2 * iabs_z2) + (y1 * y2 * iabs_z2)) * iabs_z2, \
|
||||
((y1 * x2 * iabs_z2) - (x1 * y2 * iabs_z2)) * iabs_z2); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(mul_real, func_sufix)(complex_type z, real_type r) { \
|
||||
return z * r; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(div_real, func_sufix)(complex_type z, real_type r) { \
|
||||
return z / r; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(conj, func_sufix)(complex_type z) { \
|
||||
return (complex_type)(z.x, -z.y); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(proj, func_sufix)(complex_type z) { \
|
||||
if (isinf(z.x) || isinf(z.y)) { \
|
||||
return (complex_type)(INFINITY, (copysign(CONCAT(0.0, func_sufix), z.y))); \
|
||||
} \
|
||||
return z; \
|
||||
} \
|
||||
\
|
||||
real_type FNAME(norm, func_sufix)(complex_type z) { \
|
||||
/* Returns the squared magnitude of the complex number z. */ \
|
||||
/* The norm calculated by this function is also known as */ \
|
||||
/* field norm or absolute square. */ \
|
||||
real_type x = z.x; \
|
||||
real_type y = z.y; \
|
||||
return x * x + y * y; \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(polar, func_sufix)(real_type r, real_type theta) { \
|
||||
/* Returns a complex number with magnitude r and phase angle theta. */ \
|
||||
return (complex_type)(r * cos(theta), r * sin(theta)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(exp, func_sufix)(complex_type z) { \
|
||||
/* The complex exponential function e^z for z = x+i*y */ \
|
||||
/* equals to e^x * cis(y), */ \
|
||||
/* or, e^x * (cos(y) + i*sin(y)) */ \
|
||||
real_type expx = exp(z.x); \
|
||||
return (complex_type)(expx * cos(z.y), expx * sin(z.y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(log, func_sufix)(complex_type z) { \
|
||||
/* log(z) = log(abs(z)) + i * arg(z) */ \
|
||||
return (complex_type)(log(FNAME(abs, func_sufix)(z)), FNAME(arg, func_sufix)(z)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(log10, func_sufix)(complex_type z) { \
|
||||
return FNAME(log, func_sufix)(z) / log(CONCAT(10.0, func_sufix)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(pow, func_sufix)(complex_type z1, complex_type z2) { \
|
||||
/* (z1)^(z2) = exp(z2 * log(z1)) = cexp(mul(z2, clog(z1))) */ \
|
||||
return FNAME(exp, func_sufix)(FNAME(mul, func_sufix)(z2, FNAME(log, func_sufix)(z1))); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sqrt, func_sufix)(complex_type z) { \
|
||||
/* */ \
|
||||
real_type x = z.x; \
|
||||
real_type y = z.y; \
|
||||
if (x == CONCAT(0.0, func_sufix)) { \
|
||||
real_type t = sqrt(fabs(y) / 2); \
|
||||
return (complex_type)(t, y < CONCAT(0.0, func_sufix) ? -t : t); \
|
||||
} else { \
|
||||
real_type t = sqrt(2 * FNAME(abs, func_sufix)(z) + fabs(x)); \
|
||||
real_type u = t / 2; \
|
||||
return x > CONCAT(0.0, func_sufix) ? (complex_type)(u, y / t) \
|
||||
: (complex_type)(fabs(y) / t, y < CONCAT(0.0, func_sufix) ? -u : u); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sin, func_sufix)(complex_type z) { \
|
||||
const real_type x = z.x; \
|
||||
const real_type y = z.y; \
|
||||
return (complex_type)(sin(x) * cosh(y), cos(x) * sinh(y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(sinh, func_sufix)(complex_type z) { \
|
||||
const real_type x = z.x; \
|
||||
const real_type y = z.y; \
|
||||
return (complex_type)(sinh(x) * cos(y), cosh(x) * sin(y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(cos, func_sufix)(complex_type z) { \
|
||||
const real_type x = z.x; \
|
||||
const real_type y = z.y; \
|
||||
return (complex_type)(cos(x) * cosh(y), -sin(x) * sinh(y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(cosh, func_sufix)(complex_type z) { \
|
||||
const real_type x = z.x; \
|
||||
const real_type y = z.y; \
|
||||
return (complex_type)(cosh(x) * cos(y), sinh(x) * sin(y)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(tan, func_sufix)(complex_type z) { \
|
||||
return FNAME(div, func_sufix)(FNAME(sin, func_sufix)(z), FNAME(cos, func_sufix)(z)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(tanh, func_sufix)(complex_type z) { \
|
||||
return FNAME(div, func_sufix)(FNAME(sinh, func_sufix)(z), FNAME(cosh, func_sufix)(z)); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(asinh, func_sufix)(complex_type z) { \
|
||||
complex_type t = (complex_type)((z.x - z.y) * (z.x + z.y) + CONCAT(1.0, func_sufix), CONCAT(2.0, func_sufix) * z.x * z.y); \
|
||||
t = FNAME(sqrt, func_sufix)(t) + z; \
|
||||
return FNAME(log, func_sufix)(t); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(asin, func_sufix)(complex_type z) { \
|
||||
complex_type t = (complex_type)(-z.y, z.x); \
|
||||
t = FNAME(asinh, func_sufix)(t); \
|
||||
return (complex_type)(t.y, -t.x); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(acosh, func_sufix)(complex_type z) { \
|
||||
return CONCAT(2.0, func_sufix) * \
|
||||
FNAME(log, func_sufix)(FNAME(sqrt, func_sufix)(CONCAT(0.5, func_sufix) * (z + CONCAT(1.0, func_sufix))) + \
|
||||
FNAME(sqrt, func_sufix)(CONCAT(0.5, func_sufix) * (z - CONCAT(1.0, func_sufix)))); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(acos, func_sufix)(complex_type z) { \
|
||||
complex_type t = FNAME(asin, func_sufix)(z); \
|
||||
return (complex_type)(CONCAT(M_PI_2, math_consts_sufix) - t.x, -t.y); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(atanh, func_sufix)(complex_type z) { \
|
||||
const real_type zy2 = z.y * z.y; \
|
||||
real_type n = CONCAT(1.0, func_sufix) + z.x; \
|
||||
real_type d = CONCAT(1.0, func_sufix) - z.x; \
|
||||
n = zy2 + n * n; \
|
||||
d = zy2 + d * d; \
|
||||
return (complex_type)(CONCAT(0.25, func_sufix) * (log(n) - log(d)), \
|
||||
CONCAT(0.5, func_sufix) * \
|
||||
atan2(CONCAT(2.0, func_sufix) * z.y, CONCAT(1.0, func_sufix) - zy2 - (z.x * z.x))); \
|
||||
} \
|
||||
\
|
||||
complex_type FNAME(atan, func_sufix)(complex_type z) { \
|
||||
const real_type zx2 = z.x * z.x; \
|
||||
real_type n = z.y + CONCAT(1.0, func_sufix); \
|
||||
real_type d = z.y - CONCAT(1.0, func_sufix); \
|
||||
n = zx2 + n * n; \
|
||||
d = zx2 + d * d; \
|
||||
return (complex_type)(CONCAT(0.5, func_sufix) * atan2(CONCAT(2.0, func_sufix) * z.x, CONCAT(1.0, func_sufix) - zx2 - (z.y * z.y)), \
|
||||
CONCAT(0.25, func_sufix) * (log(n / d))); \
|
||||
}
|
||||
|
||||
// float complex
|
||||
typedef float2 cfloat;
|
||||
OPENCL_COMPLEX_MATH_FUNCS(float2, float, f, _F)
|
||||
|
||||
// double complex
|
||||
#ifdef cl_khr_fp64
|
||||
# ifdef OPENCL_COMPLEX_MATH_USE_DOUBLE
|
||||
# pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
typedef double2 cdouble;
|
||||
OPENCL_COMPLEX_MATH_FUNCS(double2, double, , )
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#undef FNAME
|
||||
#undef CONCAT
|
||||
#endif // OPENCL_COMPLEX_MATH
|
||||
@@ -1,706 +1,730 @@
|
||||
#include "piopencl.h"
|
||||
#include "piresources.h"
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#ifdef MAC_OS
|
||||
# include "cl.h"
|
||||
#else
|
||||
# include "CL/cl.h"
|
||||
#endif
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIOpenCL::Context)
|
||||
cl_context context;
|
||||
cl_command_queue queue;
|
||||
PIVector<cl_device_id> devices;
|
||||
PIString complex_src;
|
||||
PRIVATE_DEFINITION_END(PIOpenCL::Context)
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIOpenCL::Buffer)
|
||||
cl_mem buffer;
|
||||
PRIVATE_DEFINITION_END(PIOpenCL::Buffer)
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIOpenCL::Program)
|
||||
cl_program program;
|
||||
PRIVATE_DEFINITION_END(PIOpenCL::Program)
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIOpenCL::Kernel)
|
||||
cl_kernel kernel;
|
||||
PRIVATE_DEFINITION_END(PIOpenCL::Kernel)
|
||||
|
||||
|
||||
|
||||
|
||||
void PIOpenCL::init() {
|
||||
Initializer::instance();
|
||||
}
|
||||
|
||||
|
||||
const PIVector<PIOpenCL::Platform> & PIOpenCL::platforms() {
|
||||
return Initializer::instance()->platforms_;
|
||||
}
|
||||
|
||||
|
||||
const PIVector<PIOpenCL::Device> PIOpenCL::devices() {
|
||||
PIVector<PIOpenCL::Device> ret;
|
||||
PIVector<PIOpenCL::Platform> pl = platforms();
|
||||
piForeachC (PIOpenCL::Platform & p, pl)
|
||||
ret << p.devices;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Device PIOpenCL::deviceByID(void * id) {
|
||||
PIVector<PIOpenCL::Platform> pl = platforms();
|
||||
piForeachC (PIOpenCL::Platform & p, pl) {
|
||||
piForeachC (PIOpenCL::Device & d, p.devices) {
|
||||
if (d.id == id) return d;
|
||||
}
|
||||
}
|
||||
return Device();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Initializer::Initializer() {
|
||||
inited_ = false;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Initializer * PIOpenCL::Initializer::instance() {
|
||||
static PIOpenCL::Initializer * ret = new PIOpenCL::Initializer();
|
||||
ret->init();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Initializer::init() {
|
||||
if (inited_) return;
|
||||
inited_ = true;
|
||||
piCout << "init OpenCL";
|
||||
platforms_.clear();
|
||||
const int max_size = 256;
|
||||
cl_platform_id cl_platforms[max_size];
|
||||
char buffer[10240];
|
||||
cl_int ret = 0;
|
||||
cl_uint plat_num = 0;
|
||||
ret = clGetPlatformIDs(max_size, cl_platforms, &plat_num);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL] Error: OpenCL platforms not found!";
|
||||
return;
|
||||
}
|
||||
for (uint i = 0; i < plat_num; i++) {
|
||||
Platform p;
|
||||
p.id = cl_platforms[i];
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_NAME, sizeof(buffer), buffer, 0);
|
||||
p.name = buffer;
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_VENDOR, sizeof(buffer), buffer, 0);
|
||||
p.vendor = buffer;
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_PROFILE, sizeof(buffer), buffer, 0);
|
||||
p.profile = buffer;
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_VERSION, sizeof(buffer), buffer, 0);
|
||||
p.version = buffer;
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_EXTENSIONS, sizeof(buffer), buffer, 0);
|
||||
p.extensions = PIString(buffer).trim().split(" ");
|
||||
|
||||
uint dev_num = 0;
|
||||
cl_device_id cl_devices[max_size];
|
||||
ret = clGetDeviceIDs(cl_platforms[i], CL_DEVICE_TYPE_ALL, max_size, cl_devices, &dev_num);
|
||||
if (ret == 0) {
|
||||
//piCout << "[OpenCLBlock] OpenCL cl_devices on platform" + PIString::fromNumber(i) + "found:" << dev_num;
|
||||
for (uint j = 0; j < dev_num; j++) {
|
||||
uint buf_uint = 0;
|
||||
ullong buf_ulong = 0;
|
||||
Device d;
|
||||
d.id = cl_devices[j];
|
||||
d.platform_id = p.id;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_NAME, sizeof(buffer), buffer, 0);
|
||||
d.name = buffer;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_VENDOR, sizeof(buffer), buffer, 0);
|
||||
d.vendor = buffer;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_VERSION, sizeof(buffer), buffer, 0);
|
||||
d.device_version = buffer;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DRIVER_VERSION, sizeof(buffer), buffer, 0);
|
||||
d.driver_version = buffer;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, 0);
|
||||
d.max_compute_units = buf_uint;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, 0);
|
||||
d.max_clock_frequency = buf_uint;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, 0);
|
||||
d.max_memory_size = buf_ulong;
|
||||
p.devices << d;
|
||||
}
|
||||
}
|
||||
platforms_ << p;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
PIOpenCL::Context::Context() {
|
||||
PRIVATE->complex_src = PIResources::get("3rd/clcomplex.h") + "\n";
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Context::~Context() {
|
||||
piCout << "destroy context" << this;
|
||||
deletePrograms();
|
||||
deleteBuffers();
|
||||
if (PRIVATE->queue)
|
||||
clReleaseCommandQueue(PRIVATE->queue);
|
||||
if (PRIVATE->context)
|
||||
clReleaseContext(PRIVATE->context);
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
void * PIOpenCL::Context::handle() {
|
||||
return PRIVATE->context;
|
||||
}
|
||||
|
||||
|
||||
void * PIOpenCL::Context::queue() {
|
||||
return PRIVATE->queue;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Context::zero() {
|
||||
programs_.clear();
|
||||
buffers_.clear();
|
||||
PRIVATE->context = 0;
|
||||
PRIVATE->queue = 0;
|
||||
PRIVATE->devices.clear();
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Context::deletePrograms() {
|
||||
piCout << "context: delete" << programs_.size() << "programs";
|
||||
PIVector<Program * > ptdl = programs_;
|
||||
programs_.clear();
|
||||
piForeach (Program * p, ptdl) {
|
||||
if (p) delete p;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Context::deleteBuffers() {
|
||||
piCout << "context: delete" << buffers_.size() << "buffers";
|
||||
PIVector<Buffer * > btdl = buffers_;
|
||||
buffers_.clear();
|
||||
piForeach (Buffer * b, btdl) {
|
||||
if (b) delete b;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Context * PIOpenCL::Context::create(const PIOpenCL::DeviceList & dl) {
|
||||
if (dl.isEmpty()) return 0;
|
||||
Context * rc = 0;
|
||||
PIVector<cl_device_id> cldl;
|
||||
for (int i = 0; i < dl.size_s(); ++i)
|
||||
if (dl[i].isValid())
|
||||
cldl << (cl_device_id)dl[i].id;
|
||||
//piCout << "create for" << dl[0].name << "...";
|
||||
cl_int ret = 0;
|
||||
cl_context con = clCreateContext(0, cldl.size_s(), cldl.data(), 0, 0, &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Context]" << "clCreateContext error" << ret;
|
||||
return 0;
|
||||
}
|
||||
cl_command_queue comq = clCreateCommandQueue(con, cldl[0], 0, &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Context]" << "clCreateCommandQueue error" << ret;
|
||||
return 0;
|
||||
}
|
||||
piCout << "create done for" << dl[0].name;
|
||||
rc = new Context();
|
||||
rc->PRIVATEWB->context = con;
|
||||
rc->PRIVATEWB->queue = comq;
|
||||
rc->PRIVATEWB->devices = cldl;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Context * PIOpenCL::Context::create(const PIString & part_name) {
|
||||
PIString pn = part_name.toLowerCase();
|
||||
PIVector<Device> dl = PIOpenCL::devices();
|
||||
piForeachC (Device & d, dl) {
|
||||
if (d.displayText().toLowerCase().contains(pn))
|
||||
return create(d);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Program * PIOpenCL::Context::createProgram(const PIString & source, const PIStringList & args, PIString * error) {
|
||||
if (error) error->clear();
|
||||
if (source.isEmpty()) {
|
||||
if (error) (*error) = "Empty program!";
|
||||
return 0;
|
||||
}
|
||||
PIString src_text = PRIVATE->complex_src + source;
|
||||
const char * csrc = src_text.dataAscii();
|
||||
size_t src_size = src_text.size();
|
||||
cl_int ret = 0;
|
||||
cl_program prog = clCreateProgramWithSource(PRIVATE->context, 1, &csrc, &src_size, &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Context]" << "clCreateProgramWithSource error" << ret;
|
||||
if (error) (*error) += "clCreateProgramWithSource error " + PIString::fromNumber(ret);
|
||||
return 0;
|
||||
}
|
||||
PIString carg = (PIStringList(args) << "-cl-kernel-arg-info").join(' ');
|
||||
ret = clBuildProgram(prog, 0, 0, carg.dataAscii(), 0, 0);
|
||||
char buffer[10240];
|
||||
clGetProgramBuildInfo(prog, PRIVATE->devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, 0);
|
||||
if (ret != 0) {
|
||||
clReleaseProgram(prog);
|
||||
piCout << "[PIOpenCL::Context]" << "clBuildProgram error" << ret;// << ":" << buffer;
|
||||
if (error) (*error) = buffer;
|
||||
return 0;
|
||||
}
|
||||
size_t uret = 0;
|
||||
ret = clGetProgramInfo(prog, CL_PROGRAM_NUM_KERNELS, sizeof(uret), &uret, 0);
|
||||
if (ret != 0) {
|
||||
clReleaseProgram(prog);
|
||||
piCout << "[PIOpenCL::Context]" << "clGetProgramInfo error" << ret;
|
||||
if (error) (*error) = "Can`t retrieve CL_PROGRAM_NUM_KERNELS";
|
||||
return 0;
|
||||
}
|
||||
const int ccnt = 10240;
|
||||
char knames[ccnt];
|
||||
ret = clGetProgramInfo(prog, CL_PROGRAM_KERNEL_NAMES, ccnt, knames, 0);
|
||||
if (ret != 0) {
|
||||
clReleaseProgram(prog);
|
||||
piCout << "[PIOpenCL::Context]" << "clGetProgramInfo error" << ret;
|
||||
if (error) (*error) = "Can`t retrieve CL_PROGRAM_KERNEL_NAMES";
|
||||
return 0;
|
||||
}
|
||||
PIStringList knl = PIString(knames).trim().split(";");
|
||||
PIVector<void*> kerns;
|
||||
piForeachC (PIString & k, knl) {
|
||||
cl_kernel kern = clCreateKernel(prog, k.dataAscii(), &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Context]" << "clCreateKernel" << k << "error" << ret;
|
||||
if (error) (*error) += "clCreateKernel(\"" + k + "\") error " + ret;
|
||||
piForeach (void* _k, kerns)
|
||||
clReleaseKernel((cl_kernel)_k);
|
||||
clReleaseProgram(prog);
|
||||
return 0;
|
||||
}
|
||||
kerns << kern;
|
||||
}
|
||||
//piCout << knl << kerns;
|
||||
Program * rp = new Program();
|
||||
rp->context_ = this;
|
||||
rp->source_ = source;
|
||||
rp->PRIVATEWB->program = prog;
|
||||
if (!rp->initKernels(kerns)) {
|
||||
delete rp;
|
||||
return 0;
|
||||
}
|
||||
programs_ << rp;
|
||||
return rp;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Buffer * PIOpenCL::Context::createBuffer(PIOpenCL::Direction dir, void * container, int type, PIByteArray def, uint elements) {
|
||||
Buffer * ret = new Buffer();
|
||||
ret->context_ = this;
|
||||
ret->dir = dir;
|
||||
ret->type = (Buffer::Container)type;
|
||||
ret->container = container;
|
||||
ret->def = def;
|
||||
ret->elements = elements;
|
||||
if (!ret->init()) {
|
||||
delete ret;
|
||||
return 0;
|
||||
}
|
||||
buffers_ << ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
PIOpenCL::Buffer::Buffer() {
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Buffer::~Buffer() {
|
||||
if (context_)
|
||||
context_->buffers_.removeAll(this);
|
||||
if (PRIVATE->buffer)
|
||||
clReleaseMemObject(PRIVATE->buffer);
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
void * PIOpenCL::Buffer::handle() {
|
||||
return PRIVATE->buffer;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::zero() {
|
||||
type = cNone;
|
||||
container = 0;
|
||||
elements = 0;
|
||||
PRIVATE->buffer = 0;
|
||||
}
|
||||
|
||||
|
||||
bool PIOpenCL::Buffer::init() {
|
||||
cl_int ret = 0;
|
||||
cl_mem_flags f = container ? CL_MEM_COPY_HOST_PTR : 0;
|
||||
switch (dir) {
|
||||
case Input : f |= CL_MEM_READ_ONLY ; break;
|
||||
case Output : f |= CL_MEM_WRITE_ONLY; break;
|
||||
case InputOutput: f |= CL_MEM_READ_WRITE; break;
|
||||
default: break;
|
||||
}
|
||||
PRIVATE->buffer = clCreateBuffer(context_->PRIVATEWB->context, f, elements * def.size(), container ? containerData() : 0, &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]" << "clCreateBuffer error" << ret;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void * PIOpenCL::Buffer::containerData() {
|
||||
if (type == cNone || !container) return 0;
|
||||
switch (type) {
|
||||
case cVector : return ((PIVector <uchar>*)container)->data();
|
||||
case cDeque : return ((PIDeque <uchar>*)container)->data();
|
||||
case cVector2D: return ((PIVector2D<uchar>*)container)->data();
|
||||
default: break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::clear() {
|
||||
if (!PRIVATE->buffer) return;
|
||||
if (def.isEmpty() || elements == 0) return;
|
||||
cl_int ret = clEnqueueFillBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, def.data(), def.size_s(), 0, elements * def.size(), 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]" << "clEnqueueFillBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyToContainer() {
|
||||
if (!PRIVATE->buffer || !container) return;
|
||||
copyTo(containerData());
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyTo(void * data) {
|
||||
if (!PRIVATE->buffer) return;
|
||||
cl_int ret = clEnqueueReadBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, 0, elements * def.size(), data, 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]" << "clEnqueueReadBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyTo(void * data, int elements_count, int elements_offset) {
|
||||
if (!PRIVATE->buffer) return;
|
||||
cl_int ret = clEnqueueReadBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, elements_offset * def.size(), elements_count * def.size(), data, 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]" << "clEnqueueReadBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyFromContainer() {
|
||||
if (!PRIVATE->buffer || !container) return;
|
||||
copyFrom(containerData());
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyFrom(void * data) {
|
||||
if (!PRIVATE->buffer) return;
|
||||
cl_int ret = clEnqueueWriteBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, 0, elements * def.size(), data, 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]" << "clEnqueueWriteBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyFrom(void * data, int elements_count, int elements_offset) {
|
||||
if (!PRIVATE->buffer) return;
|
||||
cl_int ret = clEnqueueWriteBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, elements_offset * def.size(), elements_count * def.size(), data, 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]" << "clEnqueueWriteBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
PIOpenCL::Program::Program() {
|
||||
//piCout << "new program" << this;
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Program::~Program() {
|
||||
//piCout << "destroy program" << this;
|
||||
if (context_)
|
||||
context_->programs_.removeAll(this);
|
||||
piForeach (Kernel * k, kernels_)
|
||||
delete k;
|
||||
if (PRIVATE->program)
|
||||
clReleaseProgram(PRIVATE->program);
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Program::zero() {
|
||||
context_ = 0;
|
||||
kernels_.clear();
|
||||
PRIVATE->program = 0;
|
||||
}
|
||||
|
||||
|
||||
bool PIOpenCL::Program::initKernels(PIVector<void * > kerns) {
|
||||
piForeach (void * _k, kerns) {
|
||||
cl_kernel k = (cl_kernel)_k;
|
||||
//piCout << "init kernel" << k;
|
||||
Kernel * kern = new Kernel();
|
||||
kern->context_ = context_;
|
||||
kern->program_ = this;
|
||||
kern->PRIVATEWB->kernel = k;
|
||||
if (kern->init())
|
||||
kernels_ << kern;
|
||||
else
|
||||
delete kern;
|
||||
}
|
||||
return !kernels_.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
bool PIOpenCL::Kernel::execute() {
|
||||
if (dims.isEmpty()) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "Error: empty range";
|
||||
return false;
|
||||
}
|
||||
cl_int ret = clEnqueueNDRangeKernel(context_->PRIVATEWB->queue, PRIVATE->kernel, dims.size(), 0, dims.data(), 0, 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "clEnqueueNDRangeKernel error" << ret;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Kernel::setExecuteRanges(const PIVector<int> & ranges) {
|
||||
dims = ranges.toType<size_t>();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Kernel::Kernel() {
|
||||
zero();
|
||||
//piCout << "new Kernel" << this;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Kernel::~Kernel() {
|
||||
//piCout << "del Kernel" << this;
|
||||
if (PRIVATE->kernel)
|
||||
clReleaseKernel(PRIVATE->kernel);
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Kernel::zero() {
|
||||
PRIVATE->kernel = 0;
|
||||
}
|
||||
|
||||
|
||||
bool PIOpenCL::Kernel::init() {
|
||||
char kname[1024];
|
||||
memset(kname, 0, 1024);
|
||||
cl_int ret = 0;
|
||||
ret = clGetKernelInfo(PRIVATE->kernel, CL_KERNEL_FUNCTION_NAME, 1024, kname, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "clGetKernelInfo(CL_KERNEL_FUNCTION_NAME) error" << ret;
|
||||
return false;
|
||||
}
|
||||
name_ = kname;
|
||||
cl_uint na = 0;
|
||||
ret = clGetKernelInfo(PRIVATE->kernel, CL_KERNEL_NUM_ARGS, sizeof(na), &na, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "clGetKernelInfo(CL_KERNEL_NUM_ARGS) error" << ret;
|
||||
return false;
|
||||
}
|
||||
for (cl_uint i = 0; i < na; ++i) {
|
||||
KernelArg ka;
|
||||
ka.init(PRIVATE->kernel, i);
|
||||
args_ << ka;
|
||||
}
|
||||
//piCout << "kname" << kname << na;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void setArgV(cl_kernel k, int index, T v) {
|
||||
//piCout << "setArgV" << k << index <<sizeof(v)<< v;
|
||||
clSetKernelArg(k, index, sizeof(v), &v);
|
||||
}
|
||||
|
||||
bool PIOpenCL::Kernel::setArgValueS(int index, const PIVariant & value) {
|
||||
if (index < 0 || index >= args_.size_s()) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "setArgValue invalid index" << index;
|
||||
return false;
|
||||
}
|
||||
KernelArg & ka(args_[index]);
|
||||
if (ka.dims > 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "setArgValue set scalar to \"" << ka.type_name << ka.arg_name << "\"";
|
||||
return false;
|
||||
}
|
||||
switch (ka.arg_type) {
|
||||
case Char : setArgV(PRIVATE->kernel, index, (cl_char)(value.toInt())); break;
|
||||
case UChar : setArgV(PRIVATE->kernel, index, (cl_uchar)(value.toInt())); break;
|
||||
case Short : setArgV(PRIVATE->kernel, index, (cl_short)(value.toInt())); break;
|
||||
case UShort: setArgV(PRIVATE->kernel, index, (cl_ushort)(value.toInt())); break;
|
||||
case Int : setArgV(PRIVATE->kernel, index, (cl_int)(value.toInt())); break;
|
||||
case UInt : setArgV(PRIVATE->kernel, index, (cl_uint)(value.toInt())); break;
|
||||
case Long : setArgV(PRIVATE->kernel, index, (cl_long)(value.toLLong())); break;
|
||||
case ULong : setArgV(PRIVATE->kernel, index, (cl_ulong)(value.toLLong())); break;
|
||||
case Float : setArgV(PRIVATE->kernel, index, (cl_float)(value.toFloat())); break;
|
||||
case Double: setArgV(PRIVATE->kernel, index, (cl_double)(value.toDouble())); break;
|
||||
default: break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool PIOpenCL::Kernel::bindArgValue(int index, Buffer * buffer) {
|
||||
if (!buffer) return false;
|
||||
if (index < 0 || index >= args_.size_s()) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "bindArgValue invalid index" << index;
|
||||
return false;
|
||||
}
|
||||
KernelArg & ka(args_[index]);
|
||||
if (ka.dims <= 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "bindArgValue set buffer to \"" << ka.type_name << ka.arg_name << "\"";
|
||||
return false;
|
||||
}
|
||||
clSetKernelArg(PRIVATE->kernel, index, sizeof(buffer->PRIVATEWB->buffer), &(buffer->PRIVATEWB->buffer));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int PIOpenCL::Kernel::argIndex(const PIString & an) const {
|
||||
for (int i = 0; i < args_.size_s(); ++i)
|
||||
if (args_[i].arg_name == an)
|
||||
return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::KernelArg PIOpenCL::Kernel::argByName(const PIString & an) const {
|
||||
piForeachC (KernelArg & a, args_)
|
||||
if (a.arg_name == an)
|
||||
return a;
|
||||
return KernelArg();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
PIOpenCL::KernelArg::KernelArg() {
|
||||
address_qualifier = AddressGlobal;
|
||||
access_qualifier = AccessNone;
|
||||
type_qualifier = TypeNone;
|
||||
is_pointer = false;
|
||||
arg_type = Float;
|
||||
dims = 1;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::KernelArg::init(void * _k, uint index) {
|
||||
cl_kernel k = (cl_kernel)_k;
|
||||
cl_int ret = 0;
|
||||
char nm[1024];
|
||||
memset(nm, 0, 1024);
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_TYPE_NAME, 1024, nm, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_TYPE_NAME) error" << ret;
|
||||
}
|
||||
type_name = nm;
|
||||
memset(nm, 0, 1024);
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_NAME, 1024, nm, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_NAME) error" << ret;
|
||||
}
|
||||
arg_name = nm;
|
||||
cl_kernel_arg_address_qualifier addq = 0;
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(addq), &addq, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_ADDRESS_QUALIFIER) error" << ret;
|
||||
}
|
||||
switch (addq) {
|
||||
case CL_KERNEL_ARG_ADDRESS_GLOBAL : address_qualifier = AddressGlobal; break;
|
||||
case CL_KERNEL_ARG_ADDRESS_LOCAL : address_qualifier = AddressLocal; break;
|
||||
case CL_KERNEL_ARG_ADDRESS_CONSTANT: address_qualifier = AddressConstant; break;
|
||||
case CL_KERNEL_ARG_ADDRESS_PRIVATE : address_qualifier = AddressPrivate; break;
|
||||
}
|
||||
cl_kernel_arg_access_qualifier accq = 0;
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof(accq), &accq, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_ACCESS_QUALIFIER) error" << ret;
|
||||
}
|
||||
switch (accq) {
|
||||
case CL_KERNEL_ARG_ACCESS_READ_ONLY : access_qualifier = AccessReadOnly; break;
|
||||
case CL_KERNEL_ARG_ACCESS_WRITE_ONLY: access_qualifier = AccessWriteOnly; break;
|
||||
case CL_KERNEL_ARG_ACCESS_READ_WRITE: access_qualifier = AccessReadWrite; break;
|
||||
case CL_KERNEL_ARG_ACCESS_NONE : access_qualifier = AccessNone; break;
|
||||
}
|
||||
cl_kernel_arg_type_qualifier tq = 0;
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(tq), &tq, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_TYPE_QUALIFIER) error" << ret;
|
||||
}
|
||||
switch (tq) {
|
||||
case CL_KERNEL_ARG_TYPE_CONST : type_qualifier = TypeConst; break;
|
||||
case CL_KERNEL_ARG_TYPE_RESTRICT: type_qualifier = TypeRestrict; break;
|
||||
case CL_KERNEL_ARG_TYPE_VOLATILE: type_qualifier = TypeVolatile; break;
|
||||
case CL_KERNEL_ARG_TYPE_NONE : type_qualifier = TypeNone; break;
|
||||
}
|
||||
is_pointer = false;
|
||||
base_type_name = type_name;
|
||||
base_type_name.removeAll("__global");
|
||||
dims = piMaxi(0, base_type_name.entries('*') + base_type_name.entries('['));
|
||||
base_type_name.removeAll('*');
|
||||
while (base_type_name.contains('[')) {
|
||||
int i = base_type_name.find('[');
|
||||
base_type_name.remove(i, base_type_name.find(']') - i + 1);
|
||||
}
|
||||
if (base_type_name == "char" ) arg_type = Char ;
|
||||
if (base_type_name == "uchar" ) arg_type = UChar ;
|
||||
if (base_type_name == "short") arg_type = Short;
|
||||
if (base_type_name == "ushort") arg_type = UShort;
|
||||
if (base_type_name == "int" ) arg_type = Int ;
|
||||
if (base_type_name == "uint" ) arg_type = UInt ;
|
||||
if (base_type_name == "long" ) arg_type = Long ;
|
||||
if (base_type_name == "ulong" ) arg_type = ULong ;
|
||||
if (base_type_name == "float" ) arg_type = Float ;
|
||||
if (base_type_name == "double") arg_type = Double;
|
||||
//piCout << type_name << base_type_name;
|
||||
}
|
||||
|
||||
|
||||
PICout operator <<(PICout s, const PIOpenCL::KernelArg & v) {
|
||||
s.setControls(0); s << "Arg(" << v.base_type_name << " " << v.arg_name << " (addr=" << v.address_qualifier << ",acc=" << v.access_qualifier << ",typ=" << v.type_qualifier << ",dims=" << v.dims << "))";
|
||||
s.restoreControls(); return s;
|
||||
}
|
||||
#include "piopencl.h"
|
||||
|
||||
#include "piresources.h"
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#ifdef MAC_OS
|
||||
# include "cl.h"
|
||||
#else
|
||||
# include "CL/cl.h"
|
||||
#endif
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIOpenCL::Context)
|
||||
cl_context context;
|
||||
cl_command_queue queue;
|
||||
PIVector<cl_device_id> devices;
|
||||
PIString complex_src;
|
||||
PRIVATE_DEFINITION_END(PIOpenCL::Context)
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIOpenCL::Buffer)
|
||||
cl_mem buffer;
|
||||
PRIVATE_DEFINITION_END(PIOpenCL::Buffer)
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIOpenCL::Program)
|
||||
cl_program program;
|
||||
PRIVATE_DEFINITION_END(PIOpenCL::Program)
|
||||
|
||||
|
||||
PRIVATE_DEFINITION_START(PIOpenCL::Kernel)
|
||||
cl_kernel kernel;
|
||||
PRIVATE_DEFINITION_END(PIOpenCL::Kernel)
|
||||
|
||||
|
||||
void PIOpenCL::init() {
|
||||
Initializer::instance();
|
||||
}
|
||||
|
||||
|
||||
const PIVector<PIOpenCL::Platform> & PIOpenCL::platforms() {
|
||||
return Initializer::instance()->platforms_;
|
||||
}
|
||||
|
||||
|
||||
const PIVector<PIOpenCL::Device> PIOpenCL::devices() {
|
||||
PIVector<PIOpenCL::Device> ret;
|
||||
PIVector<PIOpenCL::Platform> pl = platforms();
|
||||
piForeachC(PIOpenCL::Platform & p, pl)
|
||||
ret << p.devices;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Device PIOpenCL::deviceByID(void * id) {
|
||||
PIVector<PIOpenCL::Platform> pl = platforms();
|
||||
piForeachC(PIOpenCL::Platform & p, pl) {
|
||||
piForeachC(PIOpenCL::Device & d, p.devices) {
|
||||
if (d.id == id) return d;
|
||||
}
|
||||
}
|
||||
return Device();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Initializer::Initializer() {
|
||||
inited_ = false;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Initializer * PIOpenCL::Initializer::instance() {
|
||||
static PIOpenCL::Initializer * ret = new PIOpenCL::Initializer();
|
||||
ret->init();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Initializer::init() {
|
||||
if (inited_) return;
|
||||
inited_ = true;
|
||||
piCout << "init OpenCL";
|
||||
platforms_.clear();
|
||||
const int max_size = 256;
|
||||
cl_platform_id cl_platforms[max_size];
|
||||
char buffer[10240];
|
||||
cl_int ret = 0;
|
||||
cl_uint plat_num = 0;
|
||||
ret = clGetPlatformIDs(max_size, cl_platforms, &plat_num);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL] Error: OpenCL platforms not found!";
|
||||
return;
|
||||
}
|
||||
for (uint i = 0; i < plat_num; i++) {
|
||||
Platform p;
|
||||
p.id = cl_platforms[i];
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_NAME, sizeof(buffer), buffer, 0);
|
||||
p.name = buffer;
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_VENDOR, sizeof(buffer), buffer, 0);
|
||||
p.vendor = buffer;
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_PROFILE, sizeof(buffer), buffer, 0);
|
||||
p.profile = buffer;
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_VERSION, sizeof(buffer), buffer, 0);
|
||||
p.version = buffer;
|
||||
clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_EXTENSIONS, sizeof(buffer), buffer, 0);
|
||||
p.extensions = PIString(buffer).trim().split(" ");
|
||||
|
||||
uint dev_num = 0;
|
||||
cl_device_id cl_devices[max_size];
|
||||
ret = clGetDeviceIDs(cl_platforms[i], CL_DEVICE_TYPE_ALL, max_size, cl_devices, &dev_num);
|
||||
if (ret == 0) {
|
||||
// piCout << "[OpenCLBlock] OpenCL cl_devices on platform" + PIString::fromNumber(i) + "found:" << dev_num;
|
||||
for (uint j = 0; j < dev_num; j++) {
|
||||
uint buf_uint = 0;
|
||||
ullong buf_ulong = 0;
|
||||
Device d;
|
||||
d.id = cl_devices[j];
|
||||
d.platform_id = p.id;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_NAME, sizeof(buffer), buffer, 0);
|
||||
d.name = buffer;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_VENDOR, sizeof(buffer), buffer, 0);
|
||||
d.vendor = buffer;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_VERSION, sizeof(buffer), buffer, 0);
|
||||
d.device_version = buffer;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DRIVER_VERSION, sizeof(buffer), buffer, 0);
|
||||
d.driver_version = buffer;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, 0);
|
||||
d.max_compute_units = buf_uint;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, 0);
|
||||
d.max_clock_frequency = buf_uint;
|
||||
clGetDeviceInfo(cl_devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, 0);
|
||||
d.max_memory_size = buf_ulong;
|
||||
p.devices << d;
|
||||
}
|
||||
}
|
||||
platforms_ << p;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Context::Context() {
|
||||
PRIVATE->complex_src = PIResources::get("3rd/clcomplex.h") + "\n";
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Context::~Context() {
|
||||
piCout << "destroy context" << this;
|
||||
deletePrograms();
|
||||
deleteBuffers();
|
||||
if (PRIVATE->queue) clReleaseCommandQueue(PRIVATE->queue);
|
||||
if (PRIVATE->context) clReleaseContext(PRIVATE->context);
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
void * PIOpenCL::Context::handle() {
|
||||
return PRIVATE->context;
|
||||
}
|
||||
|
||||
|
||||
void * PIOpenCL::Context::queue() {
|
||||
return PRIVATE->queue;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Context::zero() {
|
||||
programs_.clear();
|
||||
buffers_.clear();
|
||||
PRIVATE->context = 0;
|
||||
PRIVATE->queue = 0;
|
||||
PRIVATE->devices.clear();
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Context::deletePrograms() {
|
||||
piCout << "context: delete" << programs_.size() << "programs";
|
||||
PIVector<Program *> ptdl = programs_;
|
||||
programs_.clear();
|
||||
piForeach(Program * p, ptdl) {
|
||||
if (p) delete p;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Context::deleteBuffers() {
|
||||
piCout << "context: delete" << buffers_.size() << "buffers";
|
||||
PIVector<Buffer *> btdl = buffers_;
|
||||
buffers_.clear();
|
||||
piForeach(Buffer * b, btdl) {
|
||||
if (b) delete b;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Context * PIOpenCL::Context::create(const PIOpenCL::DeviceList & dl) {
|
||||
if (dl.isEmpty()) return 0;
|
||||
Context * rc = 0;
|
||||
PIVector<cl_device_id> cldl;
|
||||
for (int i = 0; i < dl.size_s(); ++i)
|
||||
if (dl[i].isValid()) cldl << (cl_device_id)dl[i].id;
|
||||
// piCout << "create for" << dl[0].name << "...";
|
||||
cl_int ret = 0;
|
||||
cl_context con = clCreateContext(0, cldl.size_s(), cldl.data(), 0, 0, &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Context]"
|
||||
<< "clCreateContext error" << ret;
|
||||
return 0;
|
||||
}
|
||||
cl_command_queue comq = clCreateCommandQueue(con, cldl[0], 0, &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Context]"
|
||||
<< "clCreateCommandQueue error" << ret;
|
||||
return 0;
|
||||
}
|
||||
piCout << "create done for" << dl[0].name;
|
||||
rc = new Context();
|
||||
rc->PRIVATEWB->context = con;
|
||||
rc->PRIVATEWB->queue = comq;
|
||||
rc->PRIVATEWB->devices = cldl;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Context * PIOpenCL::Context::create(const PIString & part_name) {
|
||||
PIString pn = part_name.toLowerCase();
|
||||
PIVector<Device> dl = PIOpenCL::devices();
|
||||
piForeachC(Device & d, dl) {
|
||||
if (d.displayText().toLowerCase().contains(pn)) return create(d);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Program * PIOpenCL::Context::createProgram(const PIString & source, const PIStringList & args, PIString * error) {
|
||||
if (error) error->clear();
|
||||
if (source.isEmpty()) {
|
||||
if (error) (*error) = "Empty program!";
|
||||
return 0;
|
||||
}
|
||||
PIString src_text = PRIVATE->complex_src + source;
|
||||
const char * csrc = src_text.dataAscii();
|
||||
size_t src_size = src_text.size();
|
||||
cl_int ret = 0;
|
||||
cl_program prog = clCreateProgramWithSource(PRIVATE->context, 1, &csrc, &src_size, &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Context]"
|
||||
<< "clCreateProgramWithSource error" << ret;
|
||||
if (error) (*error) += "clCreateProgramWithSource error " + PIString::fromNumber(ret);
|
||||
return 0;
|
||||
}
|
||||
PIString carg = (PIStringList(args) << "-cl-kernel-arg-info").join(' ');
|
||||
ret = clBuildProgram(prog, 0, 0, carg.dataAscii(), 0, 0);
|
||||
char buffer[10240];
|
||||
clGetProgramBuildInfo(prog, PRIVATE->devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, 0);
|
||||
if (ret != 0) {
|
||||
clReleaseProgram(prog);
|
||||
piCout << "[PIOpenCL::Context]"
|
||||
<< "clBuildProgram error" << ret; // << ":" << buffer;
|
||||
if (error) (*error) = buffer;
|
||||
return 0;
|
||||
}
|
||||
size_t uret = 0;
|
||||
ret = clGetProgramInfo(prog, CL_PROGRAM_NUM_KERNELS, sizeof(uret), &uret, 0);
|
||||
if (ret != 0) {
|
||||
clReleaseProgram(prog);
|
||||
piCout << "[PIOpenCL::Context]"
|
||||
<< "clGetProgramInfo error" << ret;
|
||||
if (error) (*error) = "Can`t retrieve CL_PROGRAM_NUM_KERNELS";
|
||||
return 0;
|
||||
}
|
||||
const int ccnt = 10240;
|
||||
char knames[ccnt];
|
||||
ret = clGetProgramInfo(prog, CL_PROGRAM_KERNEL_NAMES, ccnt, knames, 0);
|
||||
if (ret != 0) {
|
||||
clReleaseProgram(prog);
|
||||
piCout << "[PIOpenCL::Context]"
|
||||
<< "clGetProgramInfo error" << ret;
|
||||
if (error) (*error) = "Can`t retrieve CL_PROGRAM_KERNEL_NAMES";
|
||||
return 0;
|
||||
}
|
||||
PIStringList knl = PIString(knames).trim().split(";");
|
||||
PIVector<void *> kerns;
|
||||
piForeachC(PIString & k, knl) {
|
||||
cl_kernel kern = clCreateKernel(prog, k.dataAscii(), &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Context]"
|
||||
<< "clCreateKernel" << k << "error" << ret;
|
||||
if (error) (*error) += "clCreateKernel(\"" + k + "\") error " + ret;
|
||||
piForeach(void * _k, kerns)
|
||||
clReleaseKernel((cl_kernel)_k);
|
||||
clReleaseProgram(prog);
|
||||
return 0;
|
||||
}
|
||||
kerns << kern;
|
||||
}
|
||||
// piCout << knl << kerns;
|
||||
Program * rp = new Program();
|
||||
rp->context_ = this;
|
||||
rp->source_ = source;
|
||||
rp->PRIVATEWB->program = prog;
|
||||
if (!rp->initKernels(kerns)) {
|
||||
delete rp;
|
||||
return 0;
|
||||
}
|
||||
programs_ << rp;
|
||||
return rp;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Buffer * PIOpenCL::Context::createBuffer(PIOpenCL::Direction dir, void * container, int type, PIByteArray def, uint elements) {
|
||||
Buffer * ret = new Buffer();
|
||||
ret->context_ = this;
|
||||
ret->dir = dir;
|
||||
ret->type = (Buffer::Container)type;
|
||||
ret->container = container;
|
||||
ret->def = def;
|
||||
ret->elements = elements;
|
||||
if (!ret->init()) {
|
||||
delete ret;
|
||||
return 0;
|
||||
}
|
||||
buffers_ << ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Buffer::Buffer() {
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Buffer::~Buffer() {
|
||||
if (context_) context_->buffers_.removeAll(this);
|
||||
if (PRIVATE->buffer) clReleaseMemObject(PRIVATE->buffer);
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
void * PIOpenCL::Buffer::handle() {
|
||||
return PRIVATE->buffer;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::zero() {
|
||||
type = cNone;
|
||||
container = 0;
|
||||
elements = 0;
|
||||
PRIVATE->buffer = 0;
|
||||
}
|
||||
|
||||
|
||||
bool PIOpenCL::Buffer::init() {
|
||||
cl_int ret = 0;
|
||||
cl_mem_flags f = container ? CL_MEM_COPY_HOST_PTR : 0;
|
||||
switch (dir) {
|
||||
case Input: f |= CL_MEM_READ_ONLY; break;
|
||||
case Output: f |= CL_MEM_WRITE_ONLY; break;
|
||||
case InputOutput: f |= CL_MEM_READ_WRITE; break;
|
||||
default: break;
|
||||
}
|
||||
PRIVATE->buffer = clCreateBuffer(context_->PRIVATEWB->context, f, elements * def.size(), container ? containerData() : 0, &ret);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]"
|
||||
<< "clCreateBuffer error" << ret;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void * PIOpenCL::Buffer::containerData() {
|
||||
if (type == cNone || !container) return 0;
|
||||
switch (type) {
|
||||
case cVector: return ((PIVector<uchar> *)container)->data();
|
||||
case cDeque: return ((PIDeque<uchar> *)container)->data();
|
||||
case cVector2D: return ((PIVector2D<uchar> *)container)->data();
|
||||
default: break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::clear() {
|
||||
if (!PRIVATE->buffer) return;
|
||||
if (def.isEmpty() || elements == 0) return;
|
||||
cl_int ret =
|
||||
clEnqueueFillBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, def.data(), def.size_s(), 0, elements * def.size(), 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]"
|
||||
<< "clEnqueueFillBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyToContainer() {
|
||||
if (!PRIVATE->buffer || !container) return;
|
||||
copyTo(containerData());
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyTo(void * data) {
|
||||
if (!PRIVATE->buffer) return;
|
||||
cl_int ret = clEnqueueReadBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, 0, elements * def.size(), data, 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]"
|
||||
<< "clEnqueueReadBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyTo(void * data, int elements_count, int elements_offset) {
|
||||
if (!PRIVATE->buffer) return;
|
||||
cl_int ret = clEnqueueReadBuffer(context_->PRIVATEWB->queue,
|
||||
PRIVATE->buffer,
|
||||
CL_TRUE,
|
||||
elements_offset * def.size(),
|
||||
elements_count * def.size(),
|
||||
data,
|
||||
0,
|
||||
0,
|
||||
0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]"
|
||||
<< "clEnqueueReadBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyFromContainer() {
|
||||
if (!PRIVATE->buffer || !container) return;
|
||||
copyFrom(containerData());
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyFrom(void * data) {
|
||||
if (!PRIVATE->buffer) return;
|
||||
cl_int ret = clEnqueueWriteBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, 0, elements * def.size(), data, 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]"
|
||||
<< "clEnqueueWriteBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Buffer::copyFrom(void * data, int elements_count, int elements_offset) {
|
||||
if (!PRIVATE->buffer) return;
|
||||
cl_int ret = clEnqueueWriteBuffer(context_->PRIVATEWB->queue,
|
||||
PRIVATE->buffer,
|
||||
CL_TRUE,
|
||||
elements_offset * def.size(),
|
||||
elements_count * def.size(),
|
||||
data,
|
||||
0,
|
||||
0,
|
||||
0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Buffer]"
|
||||
<< "clEnqueueWriteBuffer error" << ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Program::Program() {
|
||||
// piCout << "new program" << this;
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Program::~Program() {
|
||||
// piCout << "destroy program" << this;
|
||||
if (context_) context_->programs_.removeAll(this);
|
||||
piForeach(Kernel * k, kernels_)
|
||||
delete k;
|
||||
if (PRIVATE->program) clReleaseProgram(PRIVATE->program);
|
||||
zero();
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Program::zero() {
|
||||
context_ = 0;
|
||||
kernels_.clear();
|
||||
PRIVATE->program = 0;
|
||||
}
|
||||
|
||||
|
||||
bool PIOpenCL::Program::initKernels(PIVector<void *> kerns) {
|
||||
piForeach(void * _k, kerns) {
|
||||
cl_kernel k = (cl_kernel)_k;
|
||||
// piCout << "init kernel" << k;
|
||||
Kernel * kern = new Kernel();
|
||||
kern->context_ = context_;
|
||||
kern->program_ = this;
|
||||
kern->PRIVATEWB->kernel = k;
|
||||
if (kern->init())
|
||||
kernels_ << kern;
|
||||
else
|
||||
delete kern;
|
||||
}
|
||||
return !kernels_.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
bool PIOpenCL::Kernel::execute() {
|
||||
if (dims.isEmpty()) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "Error: empty range";
|
||||
return false;
|
||||
}
|
||||
cl_int ret = clEnqueueNDRangeKernel(context_->PRIVATEWB->queue, PRIVATE->kernel, dims.size(), 0, dims.data(), 0, 0, 0, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "clEnqueueNDRangeKernel error" << ret;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Kernel::setExecuteRanges(const PIVector<int> & ranges) {
|
||||
dims = ranges.toType<size_t>();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Kernel::Kernel() {
|
||||
zero();
|
||||
// piCout << "new Kernel" << this;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::Kernel::~Kernel() {
|
||||
// piCout << "del Kernel" << this;
|
||||
if (PRIVATE->kernel) clReleaseKernel(PRIVATE->kernel);
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::Kernel::zero() {
|
||||
PRIVATE->kernel = 0;
|
||||
}
|
||||
|
||||
|
||||
bool PIOpenCL::Kernel::init() {
|
||||
char kname[1024];
|
||||
memset(kname, 0, 1024);
|
||||
cl_int ret = 0;
|
||||
ret = clGetKernelInfo(PRIVATE->kernel, CL_KERNEL_FUNCTION_NAME, 1024, kname, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "clGetKernelInfo(CL_KERNEL_FUNCTION_NAME) error" << ret;
|
||||
return false;
|
||||
}
|
||||
name_ = kname;
|
||||
cl_uint na = 0;
|
||||
ret = clGetKernelInfo(PRIVATE->kernel, CL_KERNEL_NUM_ARGS, sizeof(na), &na, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "clGetKernelInfo(CL_KERNEL_NUM_ARGS) error" << ret;
|
||||
return false;
|
||||
}
|
||||
for (cl_uint i = 0; i < na; ++i) {
|
||||
KernelArg ka;
|
||||
ka.init(PRIVATE->kernel, i);
|
||||
args_ << ka;
|
||||
}
|
||||
// piCout << "kname" << kname << na;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
void setArgV(cl_kernel k, int index, T v) {
|
||||
// piCout << "setArgV" << k << index <<sizeof(v)<< v;
|
||||
clSetKernelArg(k, index, sizeof(v), &v);
|
||||
}
|
||||
|
||||
bool PIOpenCL::Kernel::setArgValueS(int index, const PIVariant & value) {
|
||||
if (index < 0 || index >= args_.size_s()) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "setArgValue invalid index" << index;
|
||||
return false;
|
||||
}
|
||||
KernelArg & ka(args_[index]);
|
||||
if (ka.dims > 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "setArgValue set scalar to \"" << ka.type_name << ka.arg_name << "\"";
|
||||
return false;
|
||||
}
|
||||
switch (ka.arg_type) {
|
||||
case Char: setArgV(PRIVATE->kernel, index, (cl_char)(value.toInt())); break;
|
||||
case UChar: setArgV(PRIVATE->kernel, index, (cl_uchar)(value.toInt())); break;
|
||||
case Short: setArgV(PRIVATE->kernel, index, (cl_short)(value.toInt())); break;
|
||||
case UShort: setArgV(PRIVATE->kernel, index, (cl_ushort)(value.toInt())); break;
|
||||
case Int: setArgV(PRIVATE->kernel, index, (cl_int)(value.toInt())); break;
|
||||
case UInt: setArgV(PRIVATE->kernel, index, (cl_uint)(value.toInt())); break;
|
||||
case Long: setArgV(PRIVATE->kernel, index, (cl_long)(value.toLLong())); break;
|
||||
case ULong: setArgV(PRIVATE->kernel, index, (cl_ulong)(value.toLLong())); break;
|
||||
case Float: setArgV(PRIVATE->kernel, index, (cl_float)(value.toFloat())); break;
|
||||
case Double: setArgV(PRIVATE->kernel, index, (cl_double)(value.toDouble())); break;
|
||||
default: break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool PIOpenCL::Kernel::bindArgValue(int index, Buffer * buffer) {
|
||||
if (!buffer) return false;
|
||||
if (index < 0 || index >= args_.size_s()) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "bindArgValue invalid index" << index;
|
||||
return false;
|
||||
}
|
||||
KernelArg & ka(args_[index]);
|
||||
if (ka.dims <= 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "bindArgValue set buffer to \"" << ka.type_name << ka.arg_name << "\"";
|
||||
return false;
|
||||
}
|
||||
clSetKernelArg(PRIVATE->kernel, index, sizeof(buffer->PRIVATEWB->buffer), &(buffer->PRIVATEWB->buffer));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
int PIOpenCL::Kernel::argIndex(const PIString & an) const {
|
||||
for (int i = 0; i < args_.size_s(); ++i)
|
||||
if (args_[i].arg_name == an) return i;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::KernelArg PIOpenCL::Kernel::argByName(const PIString & an) const {
|
||||
piForeachC(KernelArg & a, args_)
|
||||
if (a.arg_name == an) return a;
|
||||
return KernelArg();
|
||||
}
|
||||
|
||||
|
||||
PIOpenCL::KernelArg::KernelArg() {
|
||||
address_qualifier = AddressGlobal;
|
||||
access_qualifier = AccessNone;
|
||||
type_qualifier = TypeNone;
|
||||
is_pointer = false;
|
||||
arg_type = Float;
|
||||
dims = 1;
|
||||
}
|
||||
|
||||
|
||||
void PIOpenCL::KernelArg::init(void * _k, uint index) {
|
||||
cl_kernel k = (cl_kernel)_k;
|
||||
cl_int ret = 0;
|
||||
char nm[1024];
|
||||
memset(nm, 0, 1024);
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_TYPE_NAME, 1024, nm, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "clGetKernelArgInfo(CL_KERNEL_ARG_TYPE_NAME) error" << ret;
|
||||
}
|
||||
type_name = nm;
|
||||
memset(nm, 0, 1024);
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_NAME, 1024, nm, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "clGetKernelArgInfo(CL_KERNEL_ARG_NAME) error" << ret;
|
||||
}
|
||||
arg_name = nm;
|
||||
cl_kernel_arg_address_qualifier addq = 0;
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(addq), &addq, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "clGetKernelArgInfo(CL_KERNEL_ARG_ADDRESS_QUALIFIER) error" << ret;
|
||||
}
|
||||
switch (addq) {
|
||||
case CL_KERNEL_ARG_ADDRESS_GLOBAL: address_qualifier = AddressGlobal; break;
|
||||
case CL_KERNEL_ARG_ADDRESS_LOCAL: address_qualifier = AddressLocal; break;
|
||||
case CL_KERNEL_ARG_ADDRESS_CONSTANT: address_qualifier = AddressConstant; break;
|
||||
case CL_KERNEL_ARG_ADDRESS_PRIVATE: address_qualifier = AddressPrivate; break;
|
||||
}
|
||||
cl_kernel_arg_access_qualifier accq = 0;
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof(accq), &accq, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "clGetKernelArgInfo(CL_KERNEL_ARG_ACCESS_QUALIFIER) error" << ret;
|
||||
}
|
||||
switch (accq) {
|
||||
case CL_KERNEL_ARG_ACCESS_READ_ONLY: access_qualifier = AccessReadOnly; break;
|
||||
case CL_KERNEL_ARG_ACCESS_WRITE_ONLY: access_qualifier = AccessWriteOnly; break;
|
||||
case CL_KERNEL_ARG_ACCESS_READ_WRITE: access_qualifier = AccessReadWrite; break;
|
||||
case CL_KERNEL_ARG_ACCESS_NONE: access_qualifier = AccessNone; break;
|
||||
}
|
||||
cl_kernel_arg_type_qualifier tq = 0;
|
||||
ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(tq), &tq, 0);
|
||||
if (ret != 0) {
|
||||
piCout << "[PIOpenCL::Kernel]"
|
||||
<< "clGetKernelArgInfo(CL_KERNEL_ARG_TYPE_QUALIFIER) error" << ret;
|
||||
}
|
||||
switch (tq) {
|
||||
case CL_KERNEL_ARG_TYPE_CONST: type_qualifier = TypeConst; break;
|
||||
case CL_KERNEL_ARG_TYPE_RESTRICT: type_qualifier = TypeRestrict; break;
|
||||
case CL_KERNEL_ARG_TYPE_VOLATILE: type_qualifier = TypeVolatile; break;
|
||||
case CL_KERNEL_ARG_TYPE_NONE: type_qualifier = TypeNone; break;
|
||||
}
|
||||
is_pointer = false;
|
||||
base_type_name = type_name;
|
||||
base_type_name.removeAll("__global");
|
||||
dims = piMaxi(0, base_type_name.entries('*') + base_type_name.entries('['));
|
||||
base_type_name.removeAll('*');
|
||||
while (base_type_name.contains('[')) {
|
||||
int i = base_type_name.find('[');
|
||||
base_type_name.remove(i, base_type_name.find(']') - i + 1);
|
||||
}
|
||||
if (base_type_name == "char") arg_type = Char;
|
||||
if (base_type_name == "uchar") arg_type = UChar;
|
||||
if (base_type_name == "short") arg_type = Short;
|
||||
if (base_type_name == "ushort") arg_type = UShort;
|
||||
if (base_type_name == "int") arg_type = Int;
|
||||
if (base_type_name == "uint") arg_type = UInt;
|
||||
if (base_type_name == "long") arg_type = Long;
|
||||
if (base_type_name == "ulong") arg_type = ULong;
|
||||
if (base_type_name == "float") arg_type = Float;
|
||||
if (base_type_name == "double") arg_type = Double;
|
||||
// piCout << type_name << base_type_name;
|
||||
}
|
||||
|
||||
|
||||
PICout operator<<(PICout s, const PIOpenCL::KernelArg & v) {
|
||||
s.setControls(0);
|
||||
s << "Arg(" << v.base_type_name << " " << v.arg_name << " (addr=" << v.address_qualifier << ",acc=" << v.access_qualifier
|
||||
<< ",typ=" << v.type_qualifier << ",dims=" << v.dims << "))";
|
||||
s.restoreControls();
|
||||
return s;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user