#include "piopencl.h" #include "piresources.h" #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #define CL_USE_DEPRECATED_OPENCL_2_0_APIS #define CL_TARGET_OPENCL_VERSION 120 #ifdef MAC_OS # include "cl.h" #else # include "CL/cl.h" #endif PRIVATE_DEFINITION_START(PIOpenCL::Context) cl_context context; cl_command_queue queue; PIVector devices; PIString complex_src; PRIVATE_DEFINITION_END(PIOpenCL::Context) PRIVATE_DEFINITION_START(PIOpenCL::Buffer) cl_mem buffer; PRIVATE_DEFINITION_END(PIOpenCL::Buffer) PRIVATE_DEFINITION_START(PIOpenCL::Program) cl_program program; PRIVATE_DEFINITION_END(PIOpenCL::Program) PRIVATE_DEFINITION_START(PIOpenCL::Kernel) cl_kernel kernel; PRIVATE_DEFINITION_END(PIOpenCL::Kernel) void PIOpenCL::init() { Initializer::instance(); } const PIVector & PIOpenCL::platforms() { return Initializer::instance()->platforms_; } const PIVector PIOpenCL::devices() { PIVector ret; PIVector pl = platforms(); piForeachC (PIOpenCL::Platform & p, pl) ret << p.devices; return ret; } PIOpenCL::Device PIOpenCL::deviceByID(void * id) { PIVector pl = platforms(); piForeachC (PIOpenCL::Platform & p, pl) { piForeachC (PIOpenCL::Device & d, p.devices) { if (d.id == id) return d; } } return Device(); } PIOpenCL::Initializer::Initializer() { inited_ = false; } PIOpenCL::Initializer * PIOpenCL::Initializer::instance() { static PIOpenCL::Initializer * ret = new PIOpenCL::Initializer(); ret->init(); return ret; } void PIOpenCL::Initializer::init() { if (inited_) return; inited_ = true; piCout << "init OpenCL"; platforms_.clear(); const int max_size = 256; cl_platform_id cl_platforms[max_size]; char buffer[10240]; cl_int ret = 0; cl_uint plat_num = 0; ret = clGetPlatformIDs(max_size, cl_platforms, &plat_num); if (ret != 0) { piCout << "[PIOpenCL] Error: OpenCL platforms not found!"; return; } for (uint i = 0; i < plat_num; i++) { Platform p; p.id = cl_platforms[i]; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_NAME, sizeof(buffer), buffer, 0); p.name = buffer; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_VENDOR, sizeof(buffer), buffer, 0); p.vendor = buffer; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_PROFILE, sizeof(buffer), buffer, 0); p.profile = buffer; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_VERSION, sizeof(buffer), buffer, 0); p.version = buffer; clGetPlatformInfo(cl_platforms[i], CL_PLATFORM_EXTENSIONS, sizeof(buffer), buffer, 0); p.extensions = PIString(buffer).trim().split(" "); uint dev_num = 0; cl_device_id cl_devices[max_size]; ret = clGetDeviceIDs(cl_platforms[i], CL_DEVICE_TYPE_ALL, max_size, cl_devices, &dev_num); if (ret == 0) { //piCout << "[OpenCLBlock] OpenCL cl_devices on platform" + PIString::fromNumber(i) + "found:" << dev_num; for (uint j = 0; j < dev_num; j++) { uint buf_uint = 0; ullong buf_ulong = 0; Device d; d.id = cl_devices[j]; d.platform_id = p.id; clGetDeviceInfo(cl_devices[j], CL_DEVICE_NAME, sizeof(buffer), buffer, 0); d.name = buffer; clGetDeviceInfo(cl_devices[j], CL_DEVICE_VENDOR, sizeof(buffer), buffer, 0); d.vendor = buffer; clGetDeviceInfo(cl_devices[j], CL_DEVICE_VERSION, sizeof(buffer), buffer, 0); d.device_version = buffer; clGetDeviceInfo(cl_devices[j], CL_DRIVER_VERSION, sizeof(buffer), buffer, 0); d.driver_version = buffer; clGetDeviceInfo(cl_devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, 0); d.max_compute_units = buf_uint; clGetDeviceInfo(cl_devices[j], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, 0); d.max_clock_frequency = buf_uint; clGetDeviceInfo(cl_devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, 0); d.max_memory_size = buf_ulong; p.devices << d; } } platforms_ << p; } } PIOpenCL::Context::Context() { PRIVATE->complex_src = PIResources::get("3rd/clcomplex.h") + "\n"; zero(); } PIOpenCL::Context::~Context() { piCout << "destroy context" << this; deletePrograms(); deleteBuffers(); if (PRIVATE->queue) clReleaseCommandQueue(PRIVATE->queue); if (PRIVATE->context) clReleaseContext(PRIVATE->context); zero(); } void * PIOpenCL::Context::handle() { return PRIVATE->context; } void * PIOpenCL::Context::queue() { return PRIVATE->queue; } void PIOpenCL::Context::zero() { programs_.clear(); buffers_.clear(); PRIVATE->context = 0; PRIVATE->queue = 0; PRIVATE->devices.clear(); } void PIOpenCL::Context::deletePrograms() { piCout << "context: delete" << programs_.size() << "programs"; PIVector ptdl = programs_; programs_.clear(); piForeach (Program * p, ptdl) { if (p) delete p; } } void PIOpenCL::Context::deleteBuffers() { piCout << "context: delete" << buffers_.size() << "buffers"; PIVector btdl = buffers_; buffers_.clear(); piForeach (Buffer * b, btdl) { if (b) delete b; } } PIOpenCL::Context * PIOpenCL::Context::create(const PIOpenCL::DeviceList & dl) { if (dl.isEmpty()) return 0; Context * rc = 0; PIVector cldl; for (int i = 0; i < dl.size_s(); ++i) if (dl[i].isValid()) cldl << (cl_device_id)dl[i].id; //piCout << "create for" << dl[0].name << "..."; cl_int ret = 0; cl_context con = clCreateContext(0, cldl.size_s(), cldl.data(), 0, 0, &ret); if (ret != 0) { piCout << "[PIOpenCL::Context]" << "clCreateContext error" << ret; return 0; } cl_command_queue comq = clCreateCommandQueue(con, cldl[0], 0, &ret); if (ret != 0) { piCout << "[PIOpenCL::Context]" << "clCreateCommandQueue error" << ret; return 0; } piCout << "create done for" << dl[0].name; rc = new Context(); rc->PRIVATEWB->context = con; rc->PRIVATEWB->queue = comq; rc->PRIVATEWB->devices = cldl; return rc; } PIOpenCL::Context * PIOpenCL::Context::create(const PIString & part_name) { PIString pn = part_name.toLowerCase(); PIVector dl = PIOpenCL::devices(); piForeachC (Device & d, dl) { if (d.displayText().toLowerCase().contains(pn)) return create(d); } return 0; } PIOpenCL::Program * PIOpenCL::Context::createProgram(const PIString & source, const PIStringList & args, PIString * error) { if (error) error->clear(); if (source.isEmpty()) { if (error) (*error) = "Empty program!"; return 0; } PIString src_text = PRIVATE->complex_src + source; const char * csrc = src_text.dataAscii(); size_t src_size = src_text.size(); cl_int ret = 0; cl_program prog = clCreateProgramWithSource(PRIVATE->context, 1, &csrc, &src_size, &ret); if (ret != 0) { piCout << "[PIOpenCL::Context]" << "clCreateProgramWithSource error" << ret; if (error) (*error) += "clCreateProgramWithSource error " + PIString::fromNumber(ret); return 0; } PIString carg = (PIStringList(args) << "-cl-kernel-arg-info").join(' '); ret = clBuildProgram(prog, 0, 0, carg.dataAscii(), 0, 0); char buffer[10240]; clGetProgramBuildInfo(prog, PRIVATE->devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, 0); if (ret != 0) { clReleaseProgram(prog); piCout << "[PIOpenCL::Context]" << "clBuildProgram error" << ret;// << ":" << buffer; if (error) (*error) = buffer; return 0; } size_t uret = 0; ret = clGetProgramInfo(prog, CL_PROGRAM_NUM_KERNELS, sizeof(uret), &uret, 0); if (ret != 0) { clReleaseProgram(prog); piCout << "[PIOpenCL::Context]" << "clGetProgramInfo error" << ret; if (error) (*error) = "Can`t retrieve CL_PROGRAM_NUM_KERNELS"; return 0; } const int ccnt = 10240; char knames[ccnt]; ret = clGetProgramInfo(prog, CL_PROGRAM_KERNEL_NAMES, ccnt, knames, 0); if (ret != 0) { clReleaseProgram(prog); piCout << "[PIOpenCL::Context]" << "clGetProgramInfo error" << ret; if (error) (*error) = "Can`t retrieve CL_PROGRAM_KERNEL_NAMES"; return 0; } PIStringList knl = PIString(knames).trim().split(";"); PIVector kerns; piForeachC (PIString & k, knl) { cl_kernel kern = clCreateKernel(prog, k.dataAscii(), &ret); if (ret != 0) { piCout << "[PIOpenCL::Context]" << "clCreateKernel" << k << "error" << ret; if (error) (*error) += "clCreateKernel(\"" + k + "\") error " + ret; piForeach (void* _k, kerns) clReleaseKernel((cl_kernel)_k); clReleaseProgram(prog); return 0; } kerns << kern; } //piCout << knl << kerns; Program * rp = new Program(); rp->context_ = this; rp->source_ = source; rp->PRIVATEWB->program = prog; if (!rp->initKernels(kerns)) { delete rp; return 0; } programs_ << rp; return rp; } PIOpenCL::Buffer * PIOpenCL::Context::createBuffer(PIOpenCL::Direction dir, void * container, int type, PIByteArray def, uint elements) { Buffer * ret = new Buffer(); ret->context_ = this; ret->dir = dir; ret->type = (Buffer::Container)type; ret->container = container; ret->def = def; ret->elements = elements; if (!ret->init()) { delete ret; return 0; } buffers_ << ret; return ret; } PIOpenCL::Buffer::Buffer() { zero(); } PIOpenCL::Buffer::~Buffer() { if (context_) context_->buffers_.removeAll(this); if (PRIVATE->buffer) clReleaseMemObject(PRIVATE->buffer); zero(); } void * PIOpenCL::Buffer::handle() { return PRIVATE->buffer; } void PIOpenCL::Buffer::zero() { type = cNone; container = 0; elements = 0; PRIVATE->buffer = 0; } bool PIOpenCL::Buffer::init() { cl_int ret = 0; cl_mem_flags f = container ? CL_MEM_COPY_HOST_PTR : 0; switch (dir) { case Input : f |= CL_MEM_READ_ONLY ; break; case Output : f |= CL_MEM_WRITE_ONLY; break; case InputOutput: f |= CL_MEM_READ_WRITE; break; default: break; } PRIVATE->buffer = clCreateBuffer(context_->PRIVATEWB->context, f, elements * def.size(), container ? containerData() : 0, &ret); if (ret != 0) { piCout << "[PIOpenCL::Buffer]" << "clCreateBuffer error" << ret; return false; } return true; } void * PIOpenCL::Buffer::containerData() { if (type == cNone || !container) return 0; switch (type) { case cVector : return ((PIVector *)container)->data(); case cDeque : return ((PIDeque *)container)->data(); case cVector2D: return ((PIVector2D*)container)->data(); default: break; } return 0; } void PIOpenCL::Buffer::clear() { if (!PRIVATE->buffer) return; if (def.isEmpty() || elements == 0) return; cl_int ret = clEnqueueFillBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, def.data(), def.size_s(), 0, elements * def.size(), 0, 0, 0); if (ret != 0) { piCout << "[PIOpenCL::Buffer]" << "clEnqueueFillBuffer error" << ret; } } void PIOpenCL::Buffer::copyToContainer() { if (!PRIVATE->buffer || !container) return; copyTo(containerData()); } void PIOpenCL::Buffer::copyTo(void * data) { if (!PRIVATE->buffer) return; cl_int ret = clEnqueueReadBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, 0, elements * def.size(), data, 0, 0, 0); if (ret != 0) { piCout << "[PIOpenCL::Buffer]" << "clEnqueueReadBuffer error" << ret; } } void PIOpenCL::Buffer::copyTo(void * data, int elements_count, int elements_offset) { if (!PRIVATE->buffer) return; cl_int ret = clEnqueueReadBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, elements_offset * def.size(), elements_count * def.size(), data, 0, 0, 0); if (ret != 0) { piCout << "[PIOpenCL::Buffer]" << "clEnqueueReadBuffer error" << ret; } } void PIOpenCL::Buffer::copyFromContainer() { if (!PRIVATE->buffer || !container) return; copyFrom(containerData()); } void PIOpenCL::Buffer::copyFrom(void * data) { if (!PRIVATE->buffer) return; cl_int ret = clEnqueueWriteBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, 0, elements * def.size(), data, 0, 0, 0); if (ret != 0) { piCout << "[PIOpenCL::Buffer]" << "clEnqueueWriteBuffer error" << ret; } } void PIOpenCL::Buffer::copyFrom(void * data, int elements_count, int elements_offset) { if (!PRIVATE->buffer) return; cl_int ret = clEnqueueWriteBuffer(context_->PRIVATEWB->queue, PRIVATE->buffer, CL_TRUE, elements_offset * def.size(), elements_count * def.size(), data, 0, 0, 0); if (ret != 0) { piCout << "[PIOpenCL::Buffer]" << "clEnqueueWriteBuffer error" << ret; } } PIOpenCL::Program::Program() { //piCout << "new program" << this; zero(); } PIOpenCL::Program::~Program() { //piCout << "destroy program" << this; if (context_) context_->programs_.removeAll(this); piForeach (Kernel * k, kernels_) delete k; if (PRIVATE->program) clReleaseProgram(PRIVATE->program); zero(); } void PIOpenCL::Program::zero() { context_ = 0; kernels_.clear(); PRIVATE->program = 0; } bool PIOpenCL::Program::initKernels(PIVector kerns) { piForeach (void * _k, kerns) { cl_kernel k = (cl_kernel)_k; //piCout << "init kernel" << k; Kernel * kern = new Kernel(); kern->context_ = context_; kern->program_ = this; kern->PRIVATEWB->kernel = k; if (kern->init()) kernels_ << kern; else delete kern; } return !kernels_.isEmpty(); } bool PIOpenCL::Kernel::execute() { if (dims.isEmpty()) { piCout << "[PIOpenCL::Kernel]" << "Error: empty range"; return false; } cl_int ret = clEnqueueNDRangeKernel(context_->PRIVATEWB->queue, PRIVATE->kernel, dims.size(), 0, dims.data(), 0, 0, 0, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clEnqueueNDRangeKernel error" << ret; return false; } return true; } void PIOpenCL::Kernel::setExecuteRanges(const PIVector & ranges) { dims = ranges.toType(); } PIOpenCL::Kernel::Kernel() { zero(); //piCout << "new Kernel" << this; } PIOpenCL::Kernel::~Kernel() { //piCout << "del Kernel" << this; if (PRIVATE->kernel) clReleaseKernel(PRIVATE->kernel); } void PIOpenCL::Kernel::zero() { PRIVATE->kernel = 0; } bool PIOpenCL::Kernel::init() { char kname[1024]; memset(kname, 0, 1024); cl_int ret = 0; ret = clGetKernelInfo(PRIVATE->kernel, CL_KERNEL_FUNCTION_NAME, 1024, kname, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelInfo(CL_KERNEL_FUNCTION_NAME) error" << ret; return false; } name_ = kname; cl_uint na = 0; ret = clGetKernelInfo(PRIVATE->kernel, CL_KERNEL_NUM_ARGS, sizeof(na), &na, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelInfo(CL_KERNEL_NUM_ARGS) error" << ret; return false; } for (cl_uint i = 0; i < na; ++i) { KernelArg ka; ka.init(PRIVATE->kernel, i); args_ << ka; } //piCout << "kname" << kname << na; return true; } template void setArgV(cl_kernel k, int index, T v) { //piCout << "setArgV" << k << index <= args_.size_s()) { piCout << "[PIOpenCL::Kernel]" << "setArgValue invalid index" << index; return false; } KernelArg & ka(args_[index]); if (ka.dims > 0) { piCout << "[PIOpenCL::Kernel]" << "setArgValue set scalar to \"" << ka.type_name << ka.arg_name << "\""; return false; } switch (ka.arg_type) { case Char : setArgV(PRIVATE->kernel, index, (cl_char)(value.toInt())); break; case UChar : setArgV(PRIVATE->kernel, index, (cl_uchar)(value.toInt())); break; case Short : setArgV(PRIVATE->kernel, index, (cl_short)(value.toInt())); break; case UShort: setArgV(PRIVATE->kernel, index, (cl_ushort)(value.toInt())); break; case Int : setArgV(PRIVATE->kernel, index, (cl_int)(value.toInt())); break; case UInt : setArgV(PRIVATE->kernel, index, (cl_uint)(value.toInt())); break; case Long : setArgV(PRIVATE->kernel, index, (cl_long)(value.toLLong())); break; case ULong : setArgV(PRIVATE->kernel, index, (cl_ulong)(value.toLLong())); break; case Float : setArgV(PRIVATE->kernel, index, (cl_float)(value.toFloat())); break; case Double: setArgV(PRIVATE->kernel, index, (cl_double)(value.toDouble())); break; default: break; } return true; } bool PIOpenCL::Kernel::bindArgValue(int index, Buffer * buffer) { if (!buffer) return false; if (index < 0 || index >= args_.size_s()) { piCout << "[PIOpenCL::Kernel]" << "bindArgValue invalid index" << index; return false; } KernelArg & ka(args_[index]); if (ka.dims <= 0) { piCout << "[PIOpenCL::Kernel]" << "bindArgValue set buffer to \"" << ka.type_name << ka.arg_name << "\""; return false; } clSetKernelArg(PRIVATE->kernel, index, sizeof(buffer->PRIVATEWB->buffer), &(buffer->PRIVATEWB->buffer)); return true; } int PIOpenCL::Kernel::argIndex(const PIString & an) const { for (int i = 0; i < args_.size_s(); ++i) if (args_[i].arg_name == an) return i; return -1; } PIOpenCL::KernelArg PIOpenCL::Kernel::argByName(const PIString & an) const { piForeachC (KernelArg & a, args_) if (a.arg_name == an) return a; return KernelArg(); } PIOpenCL::KernelArg::KernelArg() { address_qualifier = AddressGlobal; access_qualifier = AccessNone; type_qualifier = TypeNone; is_pointer = false; arg_type = Float; dims = 1; } void PIOpenCL::KernelArg::init(void * _k, uint index) { cl_kernel k = (cl_kernel)_k; cl_int ret = 0; char nm[1024]; memset(nm, 0, 1024); ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_TYPE_NAME, 1024, nm, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_TYPE_NAME) error" << ret; } type_name = nm; memset(nm, 0, 1024); ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_NAME, 1024, nm, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_NAME) error" << ret; } arg_name = nm; cl_kernel_arg_address_qualifier addq = 0; ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(addq), &addq, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_ADDRESS_QUALIFIER) error" << ret; } switch (addq) { case CL_KERNEL_ARG_ADDRESS_GLOBAL : address_qualifier = AddressGlobal; break; case CL_KERNEL_ARG_ADDRESS_LOCAL : address_qualifier = AddressLocal; break; case CL_KERNEL_ARG_ADDRESS_CONSTANT: address_qualifier = AddressConstant; break; case CL_KERNEL_ARG_ADDRESS_PRIVATE : address_qualifier = AddressPrivate; break; } cl_kernel_arg_access_qualifier accq = 0; ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof(accq), &accq, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_ACCESS_QUALIFIER) error" << ret; } switch (accq) { case CL_KERNEL_ARG_ACCESS_READ_ONLY : access_qualifier = AccessReadOnly; break; case CL_KERNEL_ARG_ACCESS_WRITE_ONLY: access_qualifier = AccessWriteOnly; break; case CL_KERNEL_ARG_ACCESS_READ_WRITE: access_qualifier = AccessReadWrite; break; case CL_KERNEL_ARG_ACCESS_NONE : access_qualifier = AccessNone; break; } cl_kernel_arg_type_qualifier tq = 0; ret = clGetKernelArgInfo(k, index, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(tq), &tq, 0); if (ret != 0) { piCout << "[PIOpenCL::Kernel]" << "clGetKernelArgInfo(CL_KERNEL_ARG_TYPE_QUALIFIER) error" << ret; } switch (tq) { case CL_KERNEL_ARG_TYPE_CONST : type_qualifier = TypeConst; break; case CL_KERNEL_ARG_TYPE_RESTRICT: type_qualifier = TypeRestrict; break; case CL_KERNEL_ARG_TYPE_VOLATILE: type_qualifier = TypeVolatile; break; case CL_KERNEL_ARG_TYPE_NONE : type_qualifier = TypeNone; break; } is_pointer = false; base_type_name = type_name; base_type_name.removeAll("__global"); dims = piMaxi(0, base_type_name.entries('*') + base_type_name.entries('[')); base_type_name.removeAll('*'); while (base_type_name.contains('[')) { int i = base_type_name.find('['); base_type_name.remove(i, base_type_name.find(']') - i + 1); } if (base_type_name == "char" ) arg_type = Char ; if (base_type_name == "uchar" ) arg_type = UChar ; if (base_type_name == "short") arg_type = Short; if (base_type_name == "ushort") arg_type = UShort; if (base_type_name == "int" ) arg_type = Int ; if (base_type_name == "uint" ) arg_type = UInt ; if (base_type_name == "long" ) arg_type = Long ; if (base_type_name == "ulong" ) arg_type = ULong ; if (base_type_name == "float" ) arg_type = Float ; if (base_type_name == "double") arg_type = Double; //piCout << type_name << base_type_name; } PICout operator <<(PICout s, const PIOpenCL::KernelArg & v) { s.setControl(0); s << "Arg(" << v.base_type_name << " " << v.arg_name << " (addr=" << v.address_qualifier << ",acc=" << v.access_qualifier << ",typ=" << v.type_qualifier << ",dims=" << v.dims << "))"; s.restoreControl(); return s; }