kernel使用文本保存即可
__kernel void MyAddTest(__global const float *a, __global const float *b, __global float *result) {
int index = get_global_id(0);
result[index] = a[index] + b[index];
}
调用 TestOpenCLAddDemo
#include <CL/opencl.h>
#include <fstream>
int TestOpenCLAddDemo() {
const int num_len = 100; // 测试数据长
cl_int status = 0; // 函数返回状态
cl_uint platforms_num = 0; // 平台个数
cl_uint devices_num = 0; // 设备数量
//1.get platform num
status = clGetPlatformIDs(0, NULL, &platforms_num);
if (CL_SUCCESS != status)
{
printf("clGetPlatformIDs error\n");
return -1;
}
// 获得平台地址
cl_platform_id platform = NULL;
if (platforms_num > 0) // 如果有可用平台
{
cl_platform_id *pPlatforms = (cl_platform_id *)malloc(platforms_num * sizeof(cl_platform_id));
status = clGetPlatformIDs(platforms_num, pPlatforms, NULL);
platform = pPlatforms[0];
free(pPlatforms);
}
size_t name_size = 0;
status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &name_size);
char *platform_name = (char *)alloca(name_size * sizeof(char));
// get name info
status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, name_size, platform_name, NULL);
printf("Platform name:%s\n", platform_name);
//2.get GPU devices
cl_device_id *devices = NULL;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &devices_num);
if (0 == devices_num) // GPU numbers==0
{
printf("Using CPU\n");
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &devices_num);
devices = (cl_device_id *)malloc(devices_num * sizeof(cl_device_id));
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, devices_num, devices, NULL);
}
else
{
printf("Using GPU\n");
devices = (cl_device_id *)malloc(devices_num * sizeof(cl_device_id));
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, devices_num, devices, NULL);
}
// 3.create env
cl_context context = clCreateContext(NULL, 1, devices, NULL, NULL, NULL);
if (context == NULL)
{
printf("clCreateContext error\n");
return -1;
}
// 4.create clCreateCommandQueue
// 创建第1个设备的命令队列
cl_command_queue command_queue = clCreateCommandQueue(context, devices[0], 0, NULL);
if (command_queue == NULL)
{
printf("clCreateCommandQueue error\n");
return -1;
}
// 5.create cl
string filename = "D:/vs2019/cudatest/cutest/src/opencldemo.cl";
string cl_str;
const char *cl_str_ptr;
status = GetCLKernel(filename.c_str(), cl_str);
cl_str_ptr = cl_str.c_str();
size_t cl_str_size[] = { 0 };
cl_str_size[0] = strlen(cl_str_ptr);
// 创建程序对象
cl_program program = clCreateProgramWithSource(context, 1, &cl_str_ptr, cl_str_size, NULL);
if (program == NULL)
{
printf("clCreateProgramWithSource error\n");
return -1;
}
// 6.build cl
// 编译程序
status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
if (CL_SUCCESS != status) // 编译错误
{
printf("clBuildProgram error\n");
char szBuildLog[16384];
clGetProgramBuildInfo(program, *devices, CL_PROGRAM_BUILD_LOG, sizeof(szBuildLog), szBuildLog, NULL);
printf("Error in Kernel:%s\n", szBuildLog);
clReleaseProgram(program);
return -1;
}
//7. create device memery
int *ina = new int[num_len];
int *inb = new int[num_len];
for (int i = 0; i < num_len; ++i) {
ina[i] = i;
inb[i] = num_len - i;
}
cl_mem data_devicea = NULL;
cl_mem data_deviceb = NULL;
cl_mem data_device_res = NULL;
data_devicea = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,(num_len) * sizeof(int),(void *)ina,NULL);
data_deviceb = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,(num_len) * sizeof(int),(void *)inb,NULL);
data_device_res = clCreateBuffer(context,CL_MEM_WRITE_ONLY,(num_len) * sizeof(int),NULL,NULL);
if ((NULL == data_devicea) || (NULL == data_deviceb) || (NULL == data_device_res))
{
printf("clCreateBuffer:create error\n");
return -1;
}
//8.create kernel
cl_kernel kernel = clCreateKernel(program,"MyAddTest",NULL);
if (NULL == kernel)
{
printf("clCreateKernel error\n");
return -1;
}
//9.set kernel params
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_devicea);
status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&data_deviceb);
status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&data_device_res);
if (CL_SUCCESS != status)
{
printf("clSetKernelArg error\n");
}
//10.run
size_t get_global_size[1] = { 0 }; // 用于设定内核分布
get_global_size[0] = num_len; // 输入数据长
// 利用命令队列使将再设备上执行的内核排队
status = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, get_global_size, NULL, 0, NULL, NULL);
if (CL_SUCCESS != status)
{
printf("clEnqueueNDRangeKernel:run kernel error\n");
return -1;
}
// 11.get results from device
int *res = NULL;
res = new int[num_len]; // uiStrlength 为 输入字符串长度
status = clEnqueueReadBuffer(command_queue, data_device_res, CL_TRUE, 0, num_len * sizeof(int), res, 0, NULL, NULL);
if (CL_SUCCESS != status)
{
printf("clEnqueueReadBuffer:read results error\n");
return -1;
}
// 12.print results
int print_num = 10;
cout << "input" << endl;
for (int i = 0; i < print_num; ++i) {
printf("%d ", ina[i]);
}
printf("\ninput2\n");
for (int i = 0; i < print_num; ++i) {
printf("%d ", inb[i]);
}
printf("\noutput\n");
for (int i = 0; i < print_num; ++i) {
printf("%d ", res[i]);
}
printf("\n");
// -------------------------------13.释放资源--------------------------------
status = clReleaseKernel(kernel);
status = clReleaseProgram(program);
status = clReleaseMemObject(data_devicea);
status = clReleaseMemObject(data_deviceb);
status = clReleaseMemObject(data_device_res);
status = clReleaseCommandQueue(command_queue);
status = clReleaseContext(context);
delete[] ina;
delete[] inb;
delete[] res;
free(devices);
free(platform_name);
printf("done\n");
return 0;
}
cl_int GetCLKernel(const char *file_name, std::string &res)
{
size_t size = 0;
size_t file_size = 0;
char *str_ptr = NULL;
std::fstream fFile(file_name, (std::fstream::in | std::fstream::binary));
if (fFile.is_open())
{
fFile.seekg(0, std::fstream::end);
size = file_size = (size_t)fFile.tellg(); // get file size
fFile.seekg(0, std::fstream::beg);
str_ptr = new char[size + 1];
if (str_ptr==NULL)
{
fFile.close();
return 0;
}
fFile.read(str_ptr, file_size); // read bytes
fFile.close();
str_ptr[size] = '\0';
res = str_ptr;
delete[] str_ptr;
return 0;
}
cout << "Error: Failed to open cl file\n:" << file_name << endl;
return -1;
}