#include "cl_helpers.h"
#include <mutex>
#include <vector>
#include <sstream>
#include <iostream>
#include <iterator>
#include <algorithm>
using namespace cl;
using namespace std;
const unsigned DIMX = 1000;
const unsigned DIMY = 800;
const float    dx = 0.1;
const float    FRANGE_START = 0.f;
const float    FRANGE_END = 2 * 3.141592f;
const unsigned DATA_SIZE = ( FRANGE_END - FRANGE_START ) / dx;
#define USE_FORGE_OPENCL_COPY_HELPERS
static const std::string sinf_ocl_kernel = R"(
kernel void sinf(global float* out, const float dx, const unsigned DATA_SIZE, int fnCode)
{
    unsigned x = get_global_id(0);
    if(x < DATA_SIZE) {
        out[2 * x] = x * dx ;
        switch(fnCode) {
            case 0:
                out[ 2 * x + 1 ] = sin(x*dx);
                break;
            case 1:
                out[ 2 * x + 1 ] = cos(x*dx);
                break;
            case 2:
                out[ 2 * x + 1 ] = tan(x*dx);
                break;
            case 3:
                out[ 2 * x + 1 ] = log10(x*dx);
                break;
        }
    }
}
)";
void kernel(cl::Buffer& devOut, cl::CommandQueue& queue, int fnCode)
{
    static std::once_flag   compileFlag;
    static cl::Program      prog;
    static cl::Kernel       kern;
    std::call_once(compileFlag,
        [queue]() {
        prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), sinf_ocl_kernel, true);
            kern = cl::Kernel(prog, "sinf");
        });
    static const NDRange global(DATA_SIZE * 2);
    kern.setArg(0, devOut);
    kern.setArg(1, dx);
    kern.setArg(2, DATA_SIZE);
    kern.setArg(3, fnCode);
    queue.enqueueNDRangeKernel(kern, cl::NullRange, global);
}
int main(void)
{
    try {
        
        
        
        
        
        context = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue = CommandQueue(context, device);
        cl::Buffer sinOut(context, CL_MEM_READ_WRITE, sizeof(float) * DATA_SIZE * 2);
        cl::Buffer cosOut(context, CL_MEM_READ_WRITE, sizeof(float) * DATA_SIZE * 2);
        cl::Buffer tanOut(context, CL_MEM_READ_WRITE, sizeof(float) * DATA_SIZE * 2);
        cl::Buffer logOut(context, CL_MEM_READ_WRITE, sizeof(float) * DATA_SIZE * 2);
        kernel(sinOut, queue, 0);
        kernel(cosOut, queue, 1);
        kernel(tanOut, queue, 2);
        kernel(logOut, queue, 3);
        
        do {
        releaseGLBuffer(handles[0]);
        releaseGLBuffer(handles[1]);
        releaseGLBuffer(handles[2]);
        releaseGLBuffer(handles[3]);
        std::cout << err.
what() << 
"(" << err.
err() << 
")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }
    return 0;
}