#include "cl_helpers.h"
#include <mutex>
#include <complex>
#include <cmath>
#include <vector>
#include <iostream>
#include <iterator>
#include <algorithm>
static const float XMIN = -8.0f;
static const float XMAX = 8.f;
static const float YMIN = -8.0f;
static const float YMAX = 8.f;
const float DX = 0.5;
const unsigned XSIZE = (XMAX-XMIN)/DX;
const unsigned YSIZE = (YMAX-YMIN)/DX;
using namespace std;
#define USE_FORGE_OPENCL_COPY_HELPERS
static const std::string sin_surf_kernel =
R"EOK(
kernel
void surf(global float* out, const float dx,
          const float xmin, const float ymin,
          const unsigned w, const unsigned h)
{
    int i = get_global_id(0);
    int j = get_global_id(1);
    float x = xmin + i*dx;
    float y = ymin + j*dx;
    if (i<w && j<h) {
        int offset = j + i * h;
        out[ 3 * offset     ] = x;
        out[ 3 * offset + 1 ] = y;
        float z = sqrt(x*x+y*y) + 2.2204e-16;
        out[ 3 * offset + 2 ] = sin(z)/z;
    }
}
)EOK";
inline
int divup(int a, int b)
{
    return (a+b-1)/b;
}
void kernel(cl::Buffer& devOut, cl::CommandQueue& queue, cl::Device& device)
{
    static bool compileFlag = true;
    static cl::Program prog;
    static cl::Kernel  kern;
    if (compileFlag) {
        try {
            prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), sin_surf_kernel, false);
            std::vector<cl::Device> devs;
            devs.push_back(device);
            prog.build(devs);
            kern = cl::Kernel(prog, "surf");
        } catch (cl::Error err) {
            std::cout << "Compile Errors: " << std::endl;
            std::cout << err.what() << err.err() << std::endl;
            std::cout << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
            exit(255);
        }
        std::cout<< "Kernels compiled successfully" << std::endl;
        compileFlag = false;
    }
    NDRange local(8, 8);
    NDRange global(local[0]*divup(XSIZE, local[0]),
                   local[1]*divup(YSIZE, local[1]));
    kern.setArg(0, devOut);
    kern.setArg(1, DX);
    kern.setArg(2, XMIN);
    kern.setArg(3, YMIN);
    kern.setArg(4, XSIZE);
    kern.setArg(5, YSIZE);
    queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local);
}
int main(void)
{
    try {
        
        
        context = createCLGLContext(wnd);
        Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
        queue = CommandQueue(context, device);
        cl::Buffer devOut(context, CL_MEM_READ_WRITE, sizeof(float) * XSIZE * YSIZE * 3);
        kernel(devOut, queue, device);
        
        do {
        releaseGLBuffer(handle);
        std::cout << err.
what() << 
"(" << err.
err() << 
")" << std::endl;
    } catch (cl::Error err) {
        std::cout << err.what() << "(" << err.err() << ")" << std::endl;
    }
    return 0;
}