kernel void ensureInputDirBufferOk(global const float *dirAndOffsetBuffer, global float *clResultBuffer, const int numTasks) 
{
	const int iWorker = get_global_id(0); //The worker ID
	// bound check (equivalent to the limit on a 'for' loop for standard/serial C code
	if (iWorker >= numTasks)  
		return;
		
	int outputResultSize = 4;
	int startIndex = outputResultSize * iWorker;	
	const float3 dir = (float3)(dirAndOffsetBuffer[4*iWorker], dirAndOffsetBuffer[4*iWorker+1], dirAndOffsetBuffer[4*iWorker+2]);
	float offsetZ = dirAndOffsetBuffer[4*iWorker+3];

	clResultBuffer[startIndex + 0] = dir.x;
	clResultBuffer[startIndex + 1] = dir.y;
	clResultBuffer[startIndex + 2] = dir.z;
	clResultBuffer[startIndex + 3] = offsetZ;
}