kernel void detectCircles_OLD(global const float4 *pointsBuffer, global const float4 *dirsBuffer, global float *clResultBuffer, 
	const float x0, const float y0, const float z0, const float sliceSize, const float sliceThickness, 
	const int numTasks, const int nPoints, const float scoreThreshold) 
{
	const int iWorker = get_global_id(0); //The worker ID
	// bound check (equivalent to the limit on a 'for' loop for standard/serial C code
	if (iWorker >= numTasks)  
		return;
			
	int startIndex = CIRCLE_DETECTION_RESULT_SIZE * iWorker;
	for (int i=0; i<CIRCLE_DETECTION_RESULT_SIZE; i++)
		clResultBuffer[startIndex + i] = -1;
	
	const float3 dir = (float3)(dirsBuffer[iWorker].x, dirsBuffer[iWorker].y, dirsBuffer[iWorker].z);
	const float3 origin = (float3)(x0, y0, z0);
	
	float m_data[12] = {0};
	if (!setVerticalAxis(dir, m_data, true))
		return;
		
	setOrigin(origin, m_data);
	
	uchar grid[CIRCLE_DETECTION_GRID_SIZE*CIRCLE_DETECTION_GRID_SIZE] = {0};
	
	float pixelDim = sliceSize / (float)CIRCLE_DETECTION_GRID_SIZE;
	
	const int nbMaxPointsAllowed = 1000000;
	
	for (int x=0; x<CIRCLE_DETECTION_GRID_SIZE; x++)
		for (int y=0; y<CIRCLE_DETECTION_GRID_SIZE; y++)
			grid[y*CIRCLE_DETECTION_GRID_SIZE+x] = 0;
			
	int nbCellsFilled = 0;
	for (int i=0; i<nPoints; i++)
	{
		if (i < nbMaxPointsAllowed)
		{
			float3 point = (float3)(pointsBuffer[i].x, pointsBuffer[i].y, pointsBuffer[i].z);
			float u = getLocalX(point, m_data);
			float v = getLocalY(point, m_data);
			float w = getLocalZ(point, m_data);
			if ((fabs(u) < 0.5*sliceSize) && (fabs(v) < 0.5*sliceSize) && (fabs(w) < 0.5*sliceThickness))
			{
				int iCol = (int)round((u+0.5*sliceSize)/pixelDim);
				int iRow = (int)round((v+0.5*sliceSize)/pixelDim);
				if (iCol<0 || iCol>=CIRCLE_DETECTION_GRID_SIZE || iRow<0 || iRow>=CIRCLE_DETECTION_GRID_SIZE)
					continue;
				if (grid[iRow * CIRCLE_DETECTION_GRID_SIZE + iCol] == 0)
				{
					grid[iRow * CIRCLE_DETECTION_GRID_SIZE + iCol] = 1;
					nbCellsFilled ++;
				}
			}
		}
	}

	int nbForms = identifyDistinctFormsInGrid_max254Forms(grid, CIRCLE_DETECTION_GRID_SIZE, CIRCLE_DETECTION_GRID_SIZE);
	if (nbForms == 0)
		return;
	
	const int searchHalfSize = (CIRCLE_DETECTION_GRID_SIZE-1)/2+1;
	uchar centricFormId = grid[searchHalfSize * CIRCLE_DETECTION_GRID_SIZE + searchHalfSize];
	if (centricFormId == 0)
		return;
		
	float radiusMaxAllowed = infinity;
	float parametresOptimises[3] = { 0 };
	float scorePix = doCircleLevenbergMarquardt(grid, centricFormId, CIRCLE_DETECTION_GRID_SIZE, CIRCLE_DETECTION_GRID_SIZE, radiusMaxAllowed, parametresOptimises, 5, -1);		

	float localCx = parametresOptimises[circleParamIndexX] * pixelDim - 0.5*sliceSize;
	float localCy = parametresOptimises[circleParamIndexY] * pixelDim - 0.5*sliceSize;

	float3 localC = (float3)(localCx, localCy, 0);
	float globalCx = getGlobalX(localC, m_data);
	float globalCy = getGlobalY(localC, m_data);
	float globalCz = getGlobalZ(localC, m_data);

	clResultBuffer[startIndex + RESULT_INDEX_CX] = globalCx;
	clResultBuffer[startIndex + RESULT_INDEX_CY] = globalCy;
	clResultBuffer[startIndex + RESULT_INDEX_CZ] = globalCz;
	clResultBuffer[startIndex + RESULT_INDEX_R] = pixelDim * parametresOptimises[circleParamIndexR];
	clResultBuffer[startIndex + RESULT_INDEX_SCORE] = pixelDim * pixelDim * scorePix;
	clResultBuffer[startIndex + RESULT_INDEX_NBCELLS] =  nbPointsActifs(grid, centricFormId, CIRCLE_DETECTION_GRID_SIZE, CIRCLE_DETECTION_GRID_SIZE);

}
