#include "piopencl.h" #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #define CL_USE_DEPRECATED_OPENCL_2_0_APIS #ifdef MAC_OS # include "cl.h" #else # include "CL/cl.h" #endif PRIVATE_DEFINITION_START(PIOpenCL::Context) cl_context context; cl_command_queue queue; PIVector devices; PRIVATE_DEFINITION_END(PIOpenCL::Context) PRIVATE_DEFINITION_START(PIOpenCL::Program) cl_program program; PRIVATE_DEFINITION_END(PIOpenCL::Program) PRIVATE_DEFINITION_START(PIOpenCL::Kernel) cl_kernel kernel; PRIVATE_DEFINITION_END(PIOpenCL::Kernel) void PIOpenCL::init() { Initializer::instance(); } const PIVector & PIOpenCL::platforms() { return Initializer::instance()->platforms_; } const PIVector PIOpenCL::devices() { PIVector ret; PIVector pl = platforms(); piForeachC (PIOpenCL::Platform & p, pl) ret << p.devices; return ret; } PIOpenCL::Device PIOpenCL::deviceByID(void * id) { PIVector pl = platforms(); piForeachC (PIOpenCL::Platform & p, pl) { piForeachC (PIOpenCL::Device & d, p.devices) { if (d.id == id) return d; } } return Device(); } PIOpenCL::Initializer::Initializer() { inited_ = false; } PIOpenCL::Initializer * PIOpenCL::Initializer::instance() { static PIOpenCL::Initializer * ret = new PIOpenCL::Initializer(); ret->init(); return ret; } void PIOpenCL::Initializer::init() { if (inited_) return; inited_ = true; piCout << "init OpenCL"; platforms_.clear(); const int max_size = 256; cl_platform_id cl_platforms[max_size]; char buffer[10240]; cl_int ret = 0; cl_uint plat_num = 0; ret = clGetPlatformIDs(max_size, cl_platforms, &plat_num); if (ret != 0) { piCout << "[PIOpenCL] Error: OpenCL platforms not found!"; return; } for (uint i = 0; i < plat_num; i++) { Platform p; p.id = cl_platforms[i]; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_NAME, sizeof(buffer), buffer, 0); p.name = buffer; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_VENDOR, sizeof(buffer), buffer, 0); p.vendor = buffer; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_PROFILE, sizeof(buffer), buffer, 0); p.profile = buffer; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_VERSION, sizeof(buffer), buffer, 0); p.version = buffer; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_EXTENSIONS, sizeof(buffer), buffer, 0); p.extensions = PIString(buffer).trim().split(" "); uint dev_num = 0; cl_device_id cl_devices[max_size]; ret = clGetDeviceIDs(cl_platforms[i], CL_DEVICE_TYPE_ALL, max_size, cl_devices, &dev_num); if (ret == 0) { //piCout << "[OpenCLBlock] OpenCL cl_devices on platform" + PIString::fromNumber(i) + "found:" << dev_num; for (uint j = 0; j < dev_num; j++) { uint buf_uint = 0; ullong buf_ulong = 0; Device d; d.id = cl_devices[j]; d.platform_id = p.id; clGetDeviceInfo(cl_devices[j], CL_DEVICE_NAME, sizeof(buffer), buffer, 0); d.name = buffer; clGetDeviceInfo(cl_devices[j], CL_DEVICE_VENDOR, sizeof(buffer), buffer, 0); d.vendor = buffer; clGetDeviceInfo(cl_devices[j], CL_DEVICE_VERSION, sizeof(buffer), buffer, 0); d.device_version = buffer; clGetDeviceInfo(cl_devices[j], CL_DRIVER_VERSION, sizeof(buffer), buffer, 0); d.driver_version = buffer; clGetDeviceInfo(cl_devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, 0); d.max_compute_units = buf_uint; clGetDeviceInfo(cl_devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, 0); d.max_clock_frequency = buf_uint; clGetDeviceInfo(cl_devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, 0); d.max_memory_size = buf_ulong; p.devices << d; } } platforms_ << p; } } PIOpenCL::Context::Context() { zero(); } PIOpenCL::Context::~Context() { piCout << "destroy context" << this; deletePrograms(); if (PRIVATE->queue) clReleaseCommandQueue(PRIVATE->queue); if (PRIVATE->context) clReleaseContext(PRIVATE->context); zero(); } void PIOpenCL::Context::zero() { programs_.clear(); PRIVATE->context = 0; PRIVATE->queue = 0; PRIVATE->devices.clear(); } void PIOpenCL::Context::deletePrograms() { piCout << "context: delete" << programs_.size() << "progs"; PIVector ptdl = programs_; programs_.clear(); piForeach (Program * p, ptdl) { if (p) delete p; } } PIOpenCL::Context * PIOpenCL::Context::create(const PIOpenCL::DeviceList & dl) { if (dl.isEmpty()) return 0; Context * rc = 0; PIVector cldl; for (int i = 0; i < dl.size_s(); ++i) if (dl[i].isValid()) cldl << (cl_device_id)dl[i].id; //piCout << "create for" << dl[0].name << "..."; cl_int ret = 0; cl_context con = clCreateContext(0, cldl.size_s(), cldl.data(), 0, 0, &ret); if (ret != 0) { piCout << "[PIOpenCL::Context]" << "clCreateContext error" << ret; return 0; } cl_command_queue comq = clCreateCommandQueue(con, cldl[0], 0, &ret); if (ret != 0) { piCout << "[PIOpenCL::Context]" << "clCreateCommandQueue error" << ret; return 0; } piCout << "create done for" << dl[0].name; rc = new Context(); rc->PRIVATEWB->context = con; rc->PRIVATEWB->queue = comq; rc->PRIVATEWB->devices = cldl; return rc; } PIOpenCL::Program * PIOpenCL::Context::createProgram(const PIString & source, PIString * error) { if (error) error->clear(); if (source.isEmpty()) { if (error) (*error) = "Empty program!"; return 0; } const char * csrc = source.dataAscii(); size_t src_size = source.size(); cl_int ret = 0; cl_program prog = clCreateProgramWithSource(PRIVATE->context, 1, &csrc, &src_size, &ret); if (ret != 0) { piCout << "[PIOpenCL::Context]" << "clCreateProgramWithSource error" << ret; if (error) (*error) << "clCreateProgramWithSource error " << ret; return 0; } ret = clBuildProgram(prog, 0, 0, "-cl-kernel-arg-info", 0, 0); char buffer[10240]; clGetProgramBuildInfo(prog, PRIVATE->devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, 0); if (ret != 0) { clReleaseProgram(prog); piCout << "[PIOpenCL::Context]" << "clBuildProgram error" << ret;// << ":" << buffer; if (error) (*error) = buffer; return 0; } size_t uret = 0; ret = clGetProgramInfo(prog, CL_PROGRAM_NUM_KERNELS, sizeof(uret), &uret, 0); if (ret != 0) { clReleaseProgram(prog); piCout << "[PIOpenCL::Context]" << "clGetProgramInfo error" << ret; if (error) (*error) = "Can`t retrieve CL_PROGRAM_NUM_KERNELS"; return 0; } const int ccnt = 10240; char knames[ccnt]; ret = clGetProgramInfo(prog, CL_PROGRAM_KERNEL_NAMES, ccnt, knames, 0); if (ret != 0) { clReleaseProgram(prog); piCout << "[PIOpenCL::Context]" << "clGetProgramInfo error" << ret; if (error) (*error) = "Can`t retrieve CL_PROGRAM_KERNEL_NAMES"; return 0; } PIStringList knl = PIString(knames).trim().split(";"); PIVector kerns; piForeachC (PIString & k, knl) { cl_kernel kern = clCreateKernel(prog, k.dataAscii(), &ret); if (ret != 0) { piCout << "[PIOpenCL::Context]" << "clCreateKernel" << k << "error" << ret; if (error) (*error) << "clCreateKernel(\"" << k << "\") error " << ret; piForeach (void* _k, kerns) clReleaseKernel((cl_kernel)_k); clReleaseProgram(prog); return 0; } kerns << kern; } //piCout << knl << kerns; Program * rp = new Program(); rp->context_ = this; rp->source_ = source; rp->PRIVATEWB->program = prog; if (!rp->initKernels(kerns)) { delete rp; return 0; } programs_ << rp; return rp; } PIOpenCL::Program::Program() { //piCout << "new program" << this; zero(); } PIOpenCL::Program::~Program() { //piCout << "destroy program" << this; if (context_) context_->programs_.removeAll(this); piForeach (Kernel * k, kernels_) delete k; if (PRIVATE->program) clReleaseProgram(PRIVATE->program); zero(); } void PIOpenCL::Program::zero() { context_ = 0; kernels_.clear(); PRIVATE->program = 0; } bool PIOpenCL::Program::initKernels(PIVector kerns) { piForeach (void * _k, kerns) { cl_kernel k = (cl_kernel)_k; //piCout << "init kernel" << k; Kernel * kern = new Kernel(); kern->context_ = context_; kern->program_ = this; kern->PRIVATEWB->kernel = k; if (kern->init()) kernels_ << kern; else delete kern; } return !kernels_.isEmpty(); } PIOpenCL::Kernel::Kernel() { zero(); //piCout << "new Kernel" << this; } PIOpenCL::Kernel::~Kernel() { //piCout << "del Kernel" << this; if (PRIVATE->kernel) clReleaseKernel(PRIVATE->kernel); } void PIOpenCL::Kernel::zero() { PRIVATE->kernel = 0; } bool PIOpenCL::Kernel::init() { char kname[1024]; memset(kname, 0, 1024); cl_int ret = 0; ret = clGetKernelInfo(PRIVATE->kernel, CL_KERNEL_FUNCTION_NAME, 1024, kname, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelInfo(CL_KERNEL_FUNCTION_NAME) error" << ret; return false; } name_ = kname; cl_uint na = 0; ret = clGetKernelInfo(PRIVATE->kernel, CL_KERNEL_NUM_ARGS, sizeof(na), &na, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelInfo(CL_KERNEL_NUM_ARGS) error" << ret; return false; } for (cl_uint i = 0; i < na; ++i) { KernelArg ka; ka.init(PRIVATE->kernel, i); args_ << ka; } piCout << "kname" << kname << na; return true; } bool PIOpenCL::Kernel::setArgValueV(int index, const PIVariant & value) { if (index < 0 || index >= args_.size_s()) { piCout << "[PIOpenCL::Kernel]" << "setArgValue invalid index" << index; return false; } return true; } int PIOpenCL::Kernel::argIndex(const PIString & an) const { for (int i = 0; i < args_.size_s(); ++i) if (args_[i].arg_name == an) return i; return -1; } PIOpenCL::KernelArg PIOpenCL::Kernel::argByName(const PIString & an) const { piForeachC (KernelArg & a, args_) if (a.arg_name == an) return a; return KernelArg(); } PIOpenCL::KernelArg::KernelArg() { address_qualifier = AddressGlobal; access_qualifier = AccessNone; type_qualifier = TypeNone; is_pointer = false; arg_type = Float; dims = 1; } void PIOpenCL::KernelArg::init(void * _k, uint index) { cl_kernel k = (cl_kernel)_k; cl_int ret = 0; char nm[1024]; memset(nm, 0, 1024); ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_TYPE_NAME, 1024, nm, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_TYPE_NAME) error" << ret; } type_name = nm; memset(nm, 0, 1024); ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_NAME, 1024, nm, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_NAME) error" << ret; } arg_name = nm; cl_kernel_arg_address_qualifier addq = 0; ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(addq), &addq, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_ADDRESS_QUALIFIER) error" << ret; } switch (addq) { case CL_KERNEL_ARG_ADDRESS_GLOBAL : address_qualifier = AddressGlobal; break; case CL_KERNEL_ARG_ADDRESS_LOCAL : address_qualifier = AddressLocal; break; case CL_KERNEL_ARG_ADDRESS_CONSTANT: address_qualifier = AddressConstant; break; case CL_KERNEL_ARG_ADDRESS_PRIVATE : address_qualifier = AddressPrivate; break; } cl_kernel_arg_access_qualifier accq = 0; ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof(accq), &accq, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_ACCESS_QUALIFIER) error" << ret; } switch (accq) { case CL_KERNEL_ARG_ACCESS_READ_ONLY : access_qualifier = AccessReadOnly; break; case CL_KERNEL_ARG_ACCESS_WRITE_ONLY: access_qualifier = AccessWriteOnly; break; case CL_KERNEL_ARG_ACCESS_READ_WRITE: access_qualifier = AccessReadWrite; break; case CL_KERNEL_ARG_ACCESS_NONE : access_qualifier = AccessNone; break; } cl_kernel_arg_type_qualifier tq = 0; ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(tq), &tq, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_TYPE_QUALIFIER) error" << ret; } switch (tq) { case CL_KERNEL_ARG_TYPE_CONST : type_qualifier = TypeConst; break; case CL_KERNEL_ARG_TYPE_RESTRICT: type_qualifier = TypeRestrict; break; case CL_KERNEL_ARG_TYPE_VOLATILE: type_qualifier = TypeVolatile; break; case CL_KERNEL_ARG_TYPE_NONE : type_qualifier = TypeNone; break; } base_type_name = type_name; is_pointer = false; if (type_name.endsWith("*")) { is_pointer = true; base_type_name.cutRight(1); } dims = piMaxi(1, base_type_name.right(1).toInt()); if (dims > 1) base_type_name.cutRight(1); } PICout operator <<(PICout s, const PIOpenCL::KernelArg & v) { s.setControl(0); s << "Arg(" << v.base_type_name << " " << v.arg_name << " (addr=" << v.address_qualifier << ",acc=" << v.access_qualifier << ",typ=" << v.type_qualifier << ",dims=" << v.dims << "))"; s.restoreControl(); return s; }