#include "cl_helpers.h"
#include <cmath>
#include <ctime>
#include <vector>
#include <sstream>
#include <iostream>
#include <iterator>
#include <algorithm>
using namespace cl;
using namespace std;
const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP    = 2.0f;
const float NELEMS  = (MAXIMUM-MINIMUM+1)/STEP;
const unsigned DPOINTS[] = {5, 5, 5, 15, 15, 5, 15, 15};
#define USE_FORGE_OPENCL_COPY_HELPERS
static const std::string fieldKernel =
R"EOK(
constant float PI = 3.14159265359;
kernel
void pointGenKernel(global float* points, global float* dirs, int NELEMS, float MINIMUM, float STEP)
{
    int i = get_global_id(0);
    int j = get_global_id(1);
    if (i<NELEMS && j<NELEMS) {
        int id  = i + j * NELEMS;
        float x = MINIMUM + i*STEP;
        float y = MINIMUM + j*STEP;
        points[2*id+0] = x;
        points[2*id+1] = y;
        dirs[2*id+0] = sin(2.0*PI*x/10.0);
        dirs[2*id+1] = sin(2.0*PI*y/10.0);
    }
}
)EOK";
inline int divup(int a, int b)
{
    return (a+b-1)/b;
}
void generatePoints(cl::Buffer& points, cl::Buffer& dirs,
                    cl::CommandQueue& queue, cl::Device &device)
{
    static bool compileFlag = true;
    static cl::Program prog;
    static cl::Kernel  pointGenKernel;
    if (compileFlag) {
        try {
            prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), fieldKernel, false);
            std::vector<cl::Device> devs;
            devs.push_back(device);
            prog.build(devs);
            pointGenKernel = cl::Kernel(prog, "pointGenKernel");
        } catch (cl::Error err) {
            std::cout << "Compile Errors: " << std::endl;
            std::cout << err.what() << err.err() << std::endl;
            std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
            exit(255);
        }
        std::cout<< "Kernels compiled successfully" << std::endl;
        compileFlag = false;
    }
    static const NDRange local(8, 8);
    NDRange global(local[0] * divup(NELEMS, local[0]),
                   local[1] * divup(NELEMS, local[1]));
    pointGenKernel.setArg(0, points);
    pointGenKernel.setArg(1, dirs);
    pointGenKernel.setArg(2, (int)NELEMS);
    pointGenKernel.setArg(3, MINIMUM);
    pointGenKernel.setArg(4, STEP);
    queue.enqueueNDRangeKernel(pointGenKernel, cl::NullRange, global, local);
}
int main(void)
{
    try {
        
        chart.
setAxesLimits(MINIMUM-1.0f, MAXIMUM, MINIMUM-1.0f, MAXIMUM);
        divPoints.
setColor(0.9f, 0.9f, 0.0f, 1.f);
        size_t npoints = NELEMS*NELEMS;
        
        context = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue = CommandQueue(context, device);
        cl::Buffer dpoints(context, CL_MEM_READ_WRITE, sizeof(unsigned)*8);
        cl::Buffer points(context, CL_MEM_READ_WRITE, sizeof(float)*2*npoints);
        cl::Buffer dirs(context, CL_MEM_READ_WRITE, sizeof(float)*2*npoints);
        queue.enqueueWriteBuffer(dpoints, CL_TRUE, 0, sizeof(unsigned)*8, DPOINTS);
        generatePoints(points, dirs, queue, device);
        do {
        
        releaseGLBuffer(handles[0]);
        releaseGLBuffer(handles[1]);
        releaseGLBuffer(handles[2]);
        std::cout << err.
what() << 
"(" << err.
err() << 
")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }
    return 0;
}