Bootstrap

OpenCL demo code

kernel使用文本保存即可

__kernel void MyAddTest(__global const float *a, __global const float *b, __global float *result) {
    int index = get_global_id(0);
    result[index] = a[index] + b[index];
}

调用 TestOpenCLAddDemo

#include <CL/opencl.h>
#include <fstream>

int TestOpenCLAddDemo() {
    const int num_len = 100;        // 测试数据长
    cl_int status = 0;				// 函数返回状态
    cl_uint platforms_num = 0;		// 平台个数
    cl_uint devices_num = 0;		// 设备数量
    //1.get platform num
    status = clGetPlatformIDs(0, NULL, &platforms_num);
    if (CL_SUCCESS != status)
    {
        printf("clGetPlatformIDs error\n");
        return -1;
    }
    // 获得平台地址
    cl_platform_id	platform = NULL;
    if (platforms_num > 0)  // 如果有可用平台
    {
        cl_platform_id *pPlatforms = (cl_platform_id *)malloc(platforms_num * sizeof(cl_platform_id));
        status = clGetPlatformIDs(platforms_num, pPlatforms, NULL);
        platform = pPlatforms[0];	
        free(pPlatforms);			
    }
    size_t name_size = 0;			
    status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &name_size);
    char *platform_name = (char *)alloca(name_size * sizeof(char));
    // get name info
    status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, name_size, platform_name, NULL);
    printf("Platform name:%s\n", platform_name);
    //2.get GPU devices
    cl_device_id *devices = NULL;				
    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &devices_num);

    if (0 == devices_num)	// GPU numbers==0
    {
        printf("Using CPU\n");
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &devices_num);
        devices = (cl_device_id *)malloc(devices_num * sizeof(cl_device_id));
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, devices_num, devices, NULL);
    }
    else
    {
        printf("Using GPU\n");
        devices = (cl_device_id *)malloc(devices_num * sizeof(cl_device_id));
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, devices_num, devices, NULL);
    }

    // 3.create env
    cl_context context = clCreateContext(NULL, 1, devices, NULL, NULL, NULL);
    if (context == NULL)
    {
        printf("clCreateContext error\n");
        return -1;
    }

    // 4.create clCreateCommandQueue
    // 创建第1个设备的命令队列
    cl_command_queue command_queue = clCreateCommandQueue(context, devices[0], 0, NULL);
    if (command_queue == NULL)
    {
        printf("clCreateCommandQueue error\n");
        return -1;
    }
    // 5.create cl
    string filename = "D:/vs2019/cudatest/cutest/src/opencldemo.cl";
    string cl_str;	
    const char	*cl_str_ptr;
    status = GetCLKernel(filename.c_str(), cl_str);
    cl_str_ptr = cl_str.c_str();
    size_t	cl_str_size[] = { 0 };			
    cl_str_size[0] = strlen(cl_str_ptr);
    // 创建程序对象
    cl_program program = clCreateProgramWithSource(context, 1, &cl_str_ptr, cl_str_size, NULL);
    if (program == NULL)
    {
        printf("clCreateProgramWithSource error\n");
        return -1;
    }
    // 6.build cl
    // 编译程序
    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
    if (CL_SUCCESS != status)	// 编译错误
    {
        printf("clBuildProgram error\n");
        char szBuildLog[16384];
        clGetProgramBuildInfo(program, *devices, CL_PROGRAM_BUILD_LOG, sizeof(szBuildLog), szBuildLog, NULL);
        printf("Error in Kernel:%s\n", szBuildLog);
        clReleaseProgram(program);
        return -1;
    }

    //7. create device memery
    int *ina = new int[num_len];
    int *inb = new int[num_len];
    for (int i = 0; i < num_len; ++i) {
        ina[i] = i;
        inb[i] = num_len - i;
    }
    cl_mem data_devicea = NULL;
    cl_mem data_deviceb = NULL;
    cl_mem data_device_res = NULL;
    data_devicea = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,(num_len) * sizeof(int),(void *)ina,NULL);
    data_deviceb = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,(num_len) * sizeof(int),(void *)inb,NULL);
    data_device_res = clCreateBuffer(context,CL_MEM_WRITE_ONLY,(num_len) * sizeof(int),NULL,NULL);
    if ((NULL == data_devicea) || (NULL == data_deviceb) || (NULL == data_device_res))
    {
        printf("clCreateBuffer:create error\n");
        return -1;
    }
    //8.create kernel
    cl_kernel kernel = clCreateKernel(program,"MyAddTest",NULL);
    if (NULL == kernel)
    {
        printf("clCreateKernel error\n");
        return -1;
    }

    //9.set kernel params
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_devicea);
    status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&data_deviceb);
    status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&data_device_res);
    if (CL_SUCCESS != status)
    {
        printf("clSetKernelArg error\n");
    }

    //10.run
    size_t	get_global_size[1] = { 0 };		// 用于设定内核分布	
    get_global_size[0] = num_len;  // 输入数据长
    // 利用命令队列使将再设备上执行的内核排队
    status = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, get_global_size, NULL, 0, NULL, NULL);
    if (CL_SUCCESS != status)
    {
        printf("clEnqueueNDRangeKernel:run kernel error\n");
        return -1;
    }

    // 11.get results from device
    int *res = NULL;
    res = new int[num_len];  // uiStrlength 为 输入字符串长度
    status = clEnqueueReadBuffer(command_queue, data_device_res, CL_TRUE, 0, num_len * sizeof(int), res, 0, NULL, NULL);
    if (CL_SUCCESS != status)
    {
        printf("clEnqueueReadBuffer:read results error\n");
        return -1;
    }
    // 12.print results
    int print_num = 10;
    cout << "input" << endl;
    for (int i = 0; i < print_num; ++i) {
        printf("%d ", ina[i]);
    }
    printf("\ninput2\n");
    for (int i = 0; i < print_num; ++i) {
        printf("%d ", inb[i]);
    }
    printf("\noutput\n");
    for (int i = 0; i < print_num; ++i) {
        printf("%d ", res[i]);
    }
    printf("\n");
    // -------------------------------13.释放资源-------------------------------- 
    status = clReleaseKernel(kernel);
    status = clReleaseProgram(program);
    status = clReleaseMemObject(data_devicea);
    status = clReleaseMemObject(data_deviceb);
    status = clReleaseMemObject(data_device_res);
    status = clReleaseCommandQueue(command_queue);
    status = clReleaseContext(context);
    delete[] ina;
    delete[] inb;
    delete[] res;
    free(devices);
    free(platform_name);
    printf("done\n");
    return 0;
}
cl_int GetCLKernel(const char *file_name, std::string &res)
{
    size_t size = 0;
    size_t file_size = 0;
    char *str_ptr = NULL;
    std::fstream fFile(file_name, (std::fstream::in | std::fstream::binary));
    if (fFile.is_open())
    {
        fFile.seekg(0, std::fstream::end);
        size = file_size = (size_t)fFile.tellg();  // get file size
        fFile.seekg(0, std::fstream::beg);
        str_ptr = new char[size + 1];
        if (str_ptr==NULL)
        {
            fFile.close();
            return 0;
        }
        fFile.read(str_ptr, file_size);				// read bytes
        fFile.close();
        str_ptr[size] = '\0';
        res = str_ptr;
        delete[] str_ptr;
        return 0;
    }
    cout << "Error: Failed to open cl file\n:" << file_name << endl;
    return -1;
}

;