/*Some notes about OpenCL.
 * Many GPU's (Quadro included) use little-Endian byte ordering, and Java uses Big-Endian by default, luckily the
 * Jogamp library already converts the CLBuffers whenever necessary; however, sending custom raw byte data requires
 * paying attention to the Endianness.
 */

#ifndef __OPENCL_VERSION__ //This should fail during an actual openCL compile, used only to trick Eclipse into syntax highlighting this file as "C" code.
#define __kernel
#define __global
#define kernel
#define global
#define constant
#define local
#define float2 float
#define float3 float
#define float4 float
#define uchar4 char
#endif

/**
 * NOTE: On current GPU's (and for huge performance boosts) a struct's size must be evenly divisible
 * by its *largest member* type, so that accessing any member is always "memory aligned".
 * Hence we will NOT be using a float4 here, but instead break it into its 4 parts*/

/** This is the point as streamed directly from an octree node. */
struct stPoint {
	float	x;			//x-coordinate
	float	y;			//y-coordinate
	float	z;			//z-coordinate
	int		m_color;	//The ARGB color as 32-bit (cast to float), the alpha channel is the Intensity.
	int		m_iStation;	//The station ID that scanned this point. Usually 1+.
};

/**
 * This is an OpenCL output, where each element is stored in an array.
 * One element corresponds to a single work-item associated to a single 3D point that is the leading "winner" of the tournament
 * that will eventually be flattened onto a BufferedImage. Java never sees the data in this struct, it's entirely a temporary workspace for OCL.
 */
struct stPixelRecord {
	int		m_index;		//The pixel's 1D index in the depthBuffer (need WxH to get ColxRow). Negative values indicate flags/errors.
	uchar4	m_depth;		//The pixel's packed data (4bytes = ARGB): buffer distance, offsets, and quality (alpha channel).
	int		m_ARGB;			//The pixel's 32-bit ARGB color from octree point, the alpha channel is the Intensity.
	float	m_align;		//The pixels alignment: dot product relative to source station (used for scoring).
	float	m_captureDistance;//The pixel's distance from its capture source (used for scoring).
	float	m_destDistance;	//The pixel's distance from the destination station (used for scoring).
	int		m_numAve;		//The number of points that have passed the tournament to compute their running-average.
};

/**
 * Determine if a 3D coordinate is contained within a spherical volume.
 * @param point The 3D point to check whether inside this single volume.
 * @param volumeParamBuffer The stream of a sphere (contains ONLY radius).
 * @param srcMtxInv Global to local matrix transformation.
 * @return True if point lies inside the sphere.
 */
bool containedInSphere(constant float *volumeParamBuffer, const float3 point, constant float* srcMtxInv) {
	float3 ptLocal = transformPoint_arrayVersion(srcMtxInv, point); //Convert global to local coordinates (in the volume's reference frame)
	//unpack the serialized stream:
	uint p = 0;//Assume incoming pointer is already at the head of the volume stream.
//	const float4 sphere = readFloat4(&p, volumeParamBuffer);
//	float3 disp = point.xyz - sphere.xyz;//Get point position relative to the sphere's center
//	return sphere.w*sphere.w >= dot(disp.xyz, disp.xyz);//Point lies within spherical radius?
	const float radius = readFloat(&p, volumeParamBuffer);
	return radius*radius >= dot(ptLocal.xyz, ptLocal.xyz);//Point lies within spherical radius?
}

/**
 * Determine the distance that a 3D point lies from a sphere's surface.
 * @param point The 3D point to check whether inside this single volume.
 * @param volumeParamBuffer The x,y,z,r stream of a sphere.
 * @param srcMtxInv Global to local matrix transformation.
 * @return The distance from the volume, 0.0 if contained within volume.
 */
float distanceToSphere(constant float *volumeParamBuffer, const float3 point, constant float* srcMtxInv) {
	float3 ptLocal = transformPoint_arrayVersion(srcMtxInv, point); //Convert global to local coordinates (in the volume's reference frame)
	//unpack the serialized stream:
	uint p = 0;//Assume incoming pointer is already at the head of the volume stream.
//	const float4 sphere = readFloat4(&p, volumeParamBuffer);
//	float3 disp = point.xyz - sphere.xyz;//Get point position relative to the sphere's center
//	return max(sqrt(dot(disp.xyz, disp.xyz)) - sphere.w, 0.f);//Distance from sphere (0.0 is contained)
	const float radius = readFloat(&p, volumeParamBuffer);
	return max(sqrt(dot(ptLocal.xyz, ptLocal.xyz)) - radius, 0.f);//Distance from sphere (0.0 is contained)
}

/**
 * Determine if a 3D coordinate is contained within an extruded polygon volume.
 * @param point The 3D point to check whether inside this single volume.
 * @param volumeParamBuffer Pointer at the start of a param buffer describing this volume.
 * @return true/false if point is/isn't contained in the single volume.
 */
bool containedInExtPoly(constant float *volumeParamBuffer, const float3 point) {

	uint p = 0;//Assume incoming pointer is already at the head of the volume stream.
	float height = readFloat(&p, volumeParamBuffer);//Read the overall polygon height before any sub-polygon headers
	struct CPolyHeader polyHeader = readPolyHeader(volumeParamBuffer+p);//Don't forget the memory offset!

	float3 OP = point - polyHeader.O;
	float scalU = dot(OP, polyHeader.U);
	float scalV = dot(OP, polyHeader.V);
	float scalN = dot(OP, polyHeader.N);
	if ((scalU<polyHeader.uMin)||(scalU>polyHeader.uMax)||(scalV<polyHeader.vMin)||(scalV>polyHeader.vMax)||(scalN<0)||(scalN>height))//If outside of "bounding box"
		return false; //Not contained in the N-direction

	float3 pointProj = polyHeader.O + scalU*polyHeader.U + scalV*polyHeader.V;
	return belongsToPolygon(pointProj, polyHeader, true); //Can't use "point" here, must be pointProj!
}

/**
 * Returns The [max,min] distances from the eye along direction where the sphere surface intersected
 * @param eye The 3D point which emits the unit vector.
 * @param direction A UNIT vector defining the direction of the intersecting line to check. Both the positive and negative directions are checked.
 * @param sphere The x,y,z,r of a sphere.
 * @param srcMtxInv Global to local matrix transformation.
 * @return The [max,min] distances from the eye along direction where the sphere surface intersected. Both may be -INF to INF.
 */
float2 intersectsSphere2(constant float *volumeParamBuffer, const float3 eye, const float3 direction, constant float* srcMtxInv) {
	//unpack the serialized stream:
	uint p = 0;//Assume incoming pointer is already at the head of the volume stream.
	const float radius = readFloat(&p, volumeParamBuffer);
	float3 eyeLocal = transformPoint_arrayVersion(srcMtxInv, eye); //Convert global to local coordinates (in the volume's reference frame)

	const float3 d = eyeLocal;//Eye relative to the sphere center, which is [0,0,0]
	const float3 u = direction; //Ray direction from the camera
	const float ud = dot(u.xyz, d.xyz);
	const float dd = dot(d.xyz, d.xyz); //Distance squared
	const float disc = ud*ud - dd + radius*radius;//Discriminant
	if (disc < 0.f) //Imaginary discriminant
		return (float2)(INFINITY, -INFINITY); //No solutions, not *intersecting volume*
	const float sqrtdisc = sqrt(disc);
	float lam1 = -ud-sqrtdisc; //smaller
	float lam2 = -ud+sqrtdisc; //larger
	return (float2)(lam2, lam1);
}

/**
 * Returns The [max,min] distances from the eye along direction where the polygon surface intersected.
 * @param volumeParamBuffer
 * @param eye The 3D point which emits the unit vector.
 * @param direction A UNIT vector defining the direction of the intersecting line to check. Both the positive and negative directions are checked.
 * @return The [max,min] distances from the eye along direction where the volume surface intersected. Both may be -INF to INF.
 */
float2 intersectsExtPoly2(constant float *volumeParamBuffer, const float3 eye, const float3 direction) {
	uint p = 0;
	float height = readFloat(&p, volumeParamBuffer);//Unused, but must be read.
	float4 inter;
	const float2 lamInit = (float2)(-INFINITY,INFINITY);//The initial max/min distances along the line (inverted temporarily until end of function)
	float2 lam = lamInit;//The initial max/min distances along the line (inverted temporarily until end of function)

	//The base polygon
	struct CPolyHeader polyHeader = readPolyHeader(volumeParamBuffer + p);
	inter = intersectionDroitePolygone(eye, direction, polyHeader);
	if ((inter.x != 0.f) || (inter.y != 0.f) || (inter.z != 0.f) || (inter.w != 0.f)) {
		if (inter.w > lam.s0)
			lam.s0 = inter.w; //new max
		if (inter.w < lam.s1)
			lam.s1 = inter.w; //new min
	}
	//The top polygon
	polyHeader = readPolyHeader(polyHeader.afterDataBuffer);//read next header just after last data
	inter = intersectionDroitePolygone(eye, direction, polyHeader);
	if ((inter.x != 0.f) || (inter.y != 0.f) || (inter.z != 0.f) || (inter.w != 0.f)) {
		if (inter.w > lam.s0)
			lam.s0 = inter.w; //new max
		if (inter.w < lam.s1)
			lam.s1 = inter.w; //new min
	}
	//Each side polygon
	int nSides = polyHeader.nVertices;
	for (int iSide = 0; iSide < nSides; iSide++) {
		polyHeader = readPolyHeader(polyHeader.afterDataBuffer);//read next header just after last data
		inter = intersectionDroitePolygone(eye, direction, polyHeader);
		if ((inter.x != 0.f) || (inter.y != 0.f) || (inter.z != 0.f) || (inter.w != 0.f)) {//Poly contains the point
			if (inter.w > lam.s0)
				lam.s0 = inter.w; //new max
			if (inter.w < lam.s1)
				lam.s1 = inter.w; //new min
		}
	}
	if (lam.s0 == lamInit.s0)
		lam.s0 = INFINITY; //No max intersection found
	if (lam.s1 == lamInit.s1)
		lam.s1 = -INFINITY; //No min intersection found
	return lam;
}
/**
 * Returns The [max,min] distances from the eye along direction where the cylinder surface intersected.
 * @param volumeParamBuffer The cylinder's data buffer.
 * @param listMtxs Two 4x4 matrices and scalar.
 * @param eye The 3D point which emits the unit vector.
 * @param direction A UNIT vector defining the direction of the intersecting line to check. Both the positive and negative directions are checked.
 * @return The [max,min] distances from the eye along direction where the sphere surface intersected. Both may be -INF to INF.
 */
float2 intersectsCylinder(constant float *volumeParamBuffer, constant float *listMtxs, const float3 globalEye, const float3 globalDir) {
	uint p = 0;//Pointer for volumeParamBuffer[p]
	float fHalfHeight	= readFloat(&p, volumeParamBuffer); //The half height in y-direction
	float fRadius		= readFloat(&p, volumeParamBuffer); //The radius in xz-direction
	float3 e = transformPoint_arrayVersion(listMtxs, globalEye); //Convert global to volume-local coordinates
	float3 v = transformVector_arrayVersion(listMtxs, globalDir); //Convert global to volume-local coordinates
	float C = -fRadius*fRadius + e.x*e.x + e.z*e.z;
	float B = 2.f*(e.x*v.x + e.z*v.z);
	float A = v.x*v.x + v.z*v.z;
	float quot = B*B - 4*A*C;
	if (quot == 0.f) { //1 solution
		float t = -B/2/A;
		if (fabs(t*v.y+e.y) <= fHalfHeight)
			return (float2)(length(t*v), -INFINITY); //Parametric along line
		return (float2)(INFINITY,-INFINITY); //Doesn't intersect within the cylinder's height
	}
	else if (quot < 0.f) //No solutions
		return (float2)(INFINITY,-INFINITY);
	//2 solutions found (for infinitely long cylinder), but whether they intersect within the halfHeight must be checked now:
	float squot = sqrt(quot);
	float2 t;
	t.s0 = (-B + squot)/2/A; //1st solution
	t.s1 = (-B - squot)/2/A; //2nd solution
	float2 fDist;
	float2 interH = (float2)(t.s0*v.y+e.y, t.s1*v.y+e.y); //The y-coordinate (height) that each solution intersects an infinitely long cylinder.
	//First solution-------------------------------------------
	if (fabs(interH.s0) > fHalfHeight) //Isn't within the cylnder's height at the point of intersection
		fDist.s0 = INFINITY;
	else //Is within the height, find distance to intersection.
		fDist.s0 = length(t.s0*v)*sign(t.s0);  //Parametric along line
	//Second solution------------------------------------------
	if (fabs(interH.s1) > fHalfHeight) //Isn't within the cylnder's height at the point of intersection
		fDist.s1 = INFINITY;
	else //Is within the height, find distance to intersection.
		fDist.s1 = length(t.s1*v)*sign(t.s1); //Parametric along line
	//No intersection with either solution---------------------
	if (fDist.s0 == INFINITY && fDist.s1 == INFINITY) //The line didn't intersect at all!
		return fDist; //Early out
	//At least one of the solutions intersected within the height limit, but possibly the other intersected the cap
	if (fDist.s0 == INFINITY) { //1st solution intersects cap
		t.s0 = (sign(interH.s0)*fHalfHeight - e.y)/v.y; //Use the closer cap intersection to determine t
		fDist.s0 = length(t.s0*v)*sign(t.s0); //Is outside of the cylinder's height, which means that it should HIT that ceiling.
	}
	else if (fDist.s1 == INFINITY) { //2nd solution intersects cap
		t.s1 = (sign(interH.s1)*fHalfHeight - e.y)/v.y; //Use the closer cap intersection to determine t
		fDist.s1 = length(t.s1*v)*sign(t.s1); //Is outside of the cylinder's height, which means that it should HIT that ceiling.
	}
	return fDist;
}
/**
 * Returns The [max,min] distances from the eye along direction where the box's surface intersected.
 * @param volumeParamBuffer The cylinder's data buffer.
 * @param listMtxs Two 4x4 matrices and scalar: [inv mtx, fwd mtx, scale].
 * @param eye The 3D point which emits the unit vector.
 * @param direction A UNIT vector defining the direction of the intersecting line to check. Both the positive and negative directions are checked.
 * @return The [max,min] distances from the eye along direction where the box surface intersected. Both may be -INF to INF.
 */
float2 intersectsBox(constant float *volumeParamBuffer, constant float *listMtxs, const float3 globalEye, const float3 globalDir) {
	uint p = 0;//Pointer for volumeParamBuffer[p]
	float fHalfX	= readFloat(&p, volumeParamBuffer); //The half length in x-direction
	float fHalfZ	= readFloat(&p, volumeParamBuffer); //The half length in z-direction
	float fHalfH	= readFloat(&p, volumeParamBuffer); //The half height (y-direction)
	float3 volLocalEye = transformPoint_arrayVersion(listMtxs, globalEye); //Convert global to volume-local coordinates
	float3 volLocalDir = transformVector_arrayVersion(listMtxs, globalDir); //Convert global to volume-local coordinates
	return intersectsAABB(fHalfX, fHalfH, fHalfZ, volLocalEye, volLocalDir) * listMtxs[32];
}
/**
 * Converts any 2 distances from an eye that were along a direction line, into the distances of 2 half-rays from that same eye.
 * @param fDist[2] Array stores 2 distances in the [0,180] directions. The [0,180] degree farthest distances (both will always be positive), but
 * either output could be INFINITY if there was no intersection in their respective direction.
 */
void convert180(float2 lam, float *fDist) {
	//1-2 solutions, but we must return both positive distances at [0,180] degrees from eye/direction
	if (lam.s0 == 0)
		lam.s0 = mathEpsilon;//Return smallest non-zero value possible
	if (lam.s1 == 0)
		lam.s1 = mathEpsilon;//Return smallest non-zero value possible

	if (lam.s0 == lam.s1) { //Only keep 1 unique solution!
		if (lam.s0 > 0) //Unique solution in the 0-direction
			lam.s1 = INFINITY;
		else {//Unique solution in the 180-direction
			lam.s0 = INFINITY;
			lam.s1 *= -1;//ensure positive
		}
		fDist[0] = lam.s0;
		fDist[1] = lam.s1;
		return;
	}
	if (lam.s0 < lam.s1) { //Swap them, always store the "more positive" one first (useful later)!
		float fTemp = lam.s1;
		lam.s1 = lam.s0;
		lam.s0 = fTemp;
	}
	if (lam.s0 > 0) { //The "more positive" intersect is in 0-direction
		if (lam.s1 > 0) {//both intersect the volume in the 0-direction from eye
			fDist[0] = lam.s0 == INFINITY ? lam.s1 : lam.s0;
			fDist[1] = INFINITY; //return the "more positive" non-infinite distance at 0-direction (and INF at 180-degrees).
			return;
		}
		else {//if (lam.s1 < 0)//We're inside the volume, need to return both!
			fDist[0] = lam.s0;
			fDist[1] = -lam.s1; //return the "more positive" non-infinite distance at 0-direction (and INF at 180-degrees).
			return;
		}
	}
	//The "more positive" intersect is in 180-direction
	if (lam.s1 > 0) {//We're inside the volume, need to return both!
		fDist[0] = lam.s1;
		fDist[1] = -lam.s0; //return the distances BOTH as positive in opposite directions
		return;
	}
	//else if (lam.s1 < 0)//both negative, intersect behind the eye (180 degrees away)
	fDist[0] = INFINITY;
	fDist[1] = (lam.s1 == -INFINITY) ? -lam.s0 : -lam.s1;//return the "more positive" non-infinite distance at 180 degrees (and INF at 0-degrees).
}

/**
 * Check for point containment or line intersection with a sphere -OR- compute distance of intersection at [0,180] degrees.
 * @param p Buffer stream index, is updated within.
 * @param volumeParamBuffer A raw byte stream containing information about all volumes serialized (concatenated) together.
 * @param coords [contains] The 3D point (local to station) to check inside this single volume.
 * 				[intersection] The origin that the unit direction vector emanates from.
 * @param globalDir null (0) if checking point, otherwise the direction vector to check for ray intersection (starting at origin coords)
 * @param fDist[2] null (0) if checking point, otherwise the RESULT of the intersection is written here [0,1] are the [0,180] positive distances
 * 				(d>0) to the rear face of the volume or INFINITY if volume does NOT intersect the ray.
 * @return [contains] true/false if point is/isn't contained in the single volume
 */
bool touchesSphere2(constant float *volumeParamBuffer, const float3 coords, constant float *listMtxs, const float3 *globalDir, float *fDist) {
	if (globalDir == NULL || fDist == NULL) { //Need to check if a *point is contained within the volume*:
		return containedInSphere(volumeParamBuffer, coords, listMtxs);
	}
	//Else, need to check if a *line intersects the volume*:
	convert180(intersectsSphere2(volumeParamBuffer, coords, *globalDir, listMtxs), fDist);
	return false;
}

bool touchesExtPoly2(constant float *volumeParamBuffer, const float3 coords, const float3 *globalDir, float *fDist) {
	if (globalDir == NULL || fDist == NULL) { //Need to check if a *point is contained within the volume*:
		return containedInExtPoly(volumeParamBuffer, coords);
	}
	//Else, need to check if a *line intersects the volume*:
	convert180(intersectsExtPoly2(volumeParamBuffer, coords, *globalDir), fDist);
	return false;
}

bool touchesCylinder(constant float *volumeParamBuffer, const float3 coords, constant float *listMtxs, const float3 *globalDir, float *fDist) {
	if (globalDir == NULL || fDist == NULL) { //Perform a "contains" operation
		return distanceToCylinder(volumeParamBuffer, coords, listMtxs, true) == 0.f;
	}
	//Else, need to check if a *line intersects the volume*:
	convert180(intersectsCylinder(volumeParamBuffer, listMtxs, coords, *globalDir), fDist);
	return false;
}

bool touchesBox(constant float *volumeParamBuffer, const float3 globalCoords, constant float *listMtxs, const float3 *globalDir, float *fDist) {
	if (globalDir == NULL || fDist == NULL) //Perform a "contains" operation
		return distanceToBox(volumeParamBuffer, globalCoords, listMtxs, true) == 0.f;
	//Else, need to check if a *line intersects the volume* from globalEye:
	convert180(intersectsBox(volumeParamBuffer, listMtxs, globalCoords, *globalDir), fDist);
	return false;
}

/**
 * Check for point containment (if direction==NULL) or line intersection (if direction!=NULL) with a list of volumes.
 * Returns the distance to the ray intersection of the rear-face of the closest volume. at [0,180] degrees always positive.
 * If the point lies within any of the volumes, then we must get the distance of the rear-face closest
 * @param volumeParamBuffer A float stream containing floats/ints (all 4 byte aligned, very important!) information about all volumes serialized (concatenated) into a single "stream" buffer.
 * @param globalCoords [2 possibilities] The global 3D point to check if it resides inside the volumes list, OR the global eye of the ray tracing.
 * @param listMtxs Concatenated matrix transformation list, 1 for each volume FOR EACH project! Each having 33 floats (two 4x4 matrices and a scalar) float values: [inv matrix, fwd matrix, scale]).
 * @param globalDir [2 possibilities] NULL for "contains" operation, otherwise the GLOBAL direction UNIT vector (ray) emitted from the eye (at globalCoords).
 * @param fDistOut[2] The RESULTING distances at [0,180] of the intersection (either the volume or the point depending on the operation type/flag in volumeParamBuffer)
 *	>0.0: [Intersect] Distance to the rear surface of the closest(?) volume intersection with ray (from eye to existing scanaorama point).
 * 	+Inf: [Intersect] No intersection with any volumes.
 * 	 NAN: ERROR: unknown volume shape.
 * @param iFlagOut[2] The operation flag of the winning volume (closest?)
 * @return depends whether it was a contains or intersect operation (see input arguments):
 * 	 false: [Contains] The point exists in the scanorama, but was NOT contained in any of the volumes.
 * 	 true: [Contains] The point is contained in at least one of the volumes.
 */
bool touchesVolume2(constant float *volumeParamBuffer, const float3 globalCoords, constant float *listMtxs, const float3 *globalDir, float *fDistOut, int *iFlagOut) {
	uint p = 0; //Initially point to the START of volumeParamBuffer to step through the stream.
	bool bContained = false; //"Contains" operation fails by default
	int numVols = readInt(&p, volumeParamBuffer); //Read first int, containing number of shapes (1-127).
	if (numVols < 1 || numVols > 30) //TODO: remove max cap?
		return bContained; //Error: May have read the buffer incorrectly
	float fDist[2];
	if (globalDir != NULL && fDistOut != NULL) //"Intersect" operation only (instead of "contains")
		fDistOut[0] = fDistOut[1] = fDist[0] = fDist[1] = INFINITY; //Nearest volume's distance to its rear-most face intersection at [0,180] always positive.
	if (iFlagOut != NULL) //Reset operations to NONE
		iFlagOut[0] = iFlagOut[1] = 0;

	uint pMXB = 0; //START of listMtxs to step through the stream for SOME volumes that needs it (not all volumes will)
	for (int v = 0; v < numVols; v++) { //Cycle through all volumes (1-based index), reading the volumeParamBuffer stream
		int volType = readInt(&p, volumeParamBuffer); //read 32-bit integer, containing type (class_id) of current volume
		int volOper = readInt(&p, volumeParamBuffer); //read 32-bit integer, containing operation flag type of current volume
		int buffLength = readInt(&p, volumeParamBuffer); //read 32-bit integer, containing length of the current volume (in 32-bit increments)
		constant float *volParamBuffStart = volumeParamBuffer + p; //Points to the beginning of the volume info, each touches() method will assume it's starting from this address
		switch (volType) {
			case 3408: //Sphere:
				bContained = touchesSphere2(volParamBuffStart, globalCoords, listMtxs+pMXB, globalDir, fDist);
			break;
			case 3419: //Extruded Polygon:
				bContained = touchesExtPoly2(volParamBuffStart, globalCoords, globalDir, fDist);
			break;
//			case 3416: //Mesh item, list of triangles
//				bContained = touchesMesh2(volParamBuffStart, coords, globalDir, fDist);
//			break;
//			case 3402: //RvCylinderItem (Deprecated)
//			case 3403: //RvCylinderAutoFitItem (Deprecated)
			case 3430: //RvGenericCylinderItem
				bContained = touchesCylinder(volParamBuffStart, globalCoords, listMtxs+pMXB, globalDir, fDist);
			break;
			case 3426: //RvSegmentationItem (treated as a box)
			case 3431: //RvGenericBoxItem
				bContained = touchesBox(volParamBuffStart, globalCoords, listMtxs+pMXB, globalDir, fDist);
			break;
			default: { //Error: Invalid volume type, skip immediately.
				//fDist = fDist;//Just keep it the same
			}
		}
		if (globalDir == NULL || fDistOut == NULL) { //A single "Contains" operation
			if (bContained) {//It was contained in the current volume, break immediately, don't search further volumes
				if (iFlagOut != NULL)
					iFlagOut[0] = volOper; //The single point in a "contains", store the flag operation
				return true;
			}
		}
		else { //"Intersects" operation
			for (int i = 0; i < 2; i++){ //0 or 180 degrees direction
				if (fDist[i] < fDistOut[i]) {//If it's closer, save the newest/closest one
					fDistOut[i] = fDist[i];
					if (iFlagOut != NULL)
						iFlagOut[i] = volOper;
				}
			}
		}
		p += buffLength; //Jump to the next volume in the volumeParamBuffer.
		pMXB += MTX_CHUNK_SIZE; //Jump to the next volume in the matrix buffer.
	} //Volume list loop
	return bContained; //"Contains" found no volumes, -OR- "Intersection" operation returns closest intersection distance after cycling through ALL volumes
}

/**For creating a virtual station (not a demolition), we must initialize the pixel record canvas to flag all points as "empty" for the tournament.
 * The demolition already performs this init in the selectPixelsIntersectingVolList2.
 * @param canvasRecords The list of the canvas pixel point-records.
 * @param canvasSize The size of the canvas (width*height).
 */
kernel void initRecordCanvas(global struct stPixelRecord *canvasRecords, const int canvasSize) {
	const int global_id = get_global_id(0);
	if (global_id >= canvasSize)
		return;
	canvasRecords[global_id].m_index = -1;  //Initialize the pixel-record as invalid right away, will be populated later in tournament.
	canvasRecords[global_id].m_destDistance = INFINITY;//Ensure the default points don't obstruct new points during tournament
	//canvasRecords[global_id].m_numAve = 0;
	//canvasRecords[global_id].m_ARGB  = 0; //Initialize the color as black with fully transparent (0-intensity)
	//canvasRecords[global_id].m_depth = 0; //Initialize the pixel-record as invalid right away, will be populated later in tournament.
}

/**
 * The input 2D image containing buffer depth information will be "intersected" with the input volume list, returning a mask image describing whether
 * the volumes were contained, intersected (at what distance), or neither. Only needs HALF of the pixel workers, since each worker is assigned the 2 pixels
 * corresponding to a ray emitted in BOTH directions from the eye of the scan.
 * This will initialize all canvasRecords[i].m_index = -1 before.
 * @param destDepthFlags The 2D output (float) mask describing the results of the contain()/intersect operation for each "pixel" in the image.
 * @param canvasRecords The 2D output record canvas, which for now should only store the m_index flag.
 * @param srcDepthBuffer The 2D image to check for points contained in volume list specified by volumeParamBuffer.
 * @param volumeParamBuffer A byte stream containing information about all volumes serialized (concatenated) into a single "stream" buffer.
 * @param matFrustumToOctree A matrix transformation that converts inner frustum (demolition locator or virtual station) coordinates into an octree's
 * data coordinate system so that its points can be used directly.
 * @param listMtxs Concatenated matrix transformation list, 1 for each volume FOR EACH project! Each having 33 floats (two 4x4 matrices and a scalar) float values: [inv matrix, fwd matrix, scale]).
 * @param depthW The number of pixels horizontally across the image.
 * @param depthH The number of pixels vertically across the image.
 * @return (output written to destDepthFlags):
 * 	   0: The point exists in the scanorama, but was NOT contained in any of the volumes.
 * 	+Inf: ERROR: a point was contained in a volume, BUT has no intersect with any volumes. May be round-off error between contains/intersects methods?
 *	-Inf: No pixel info was in the scanorama, and there was no intersection with any of the volumes
 *	 NAN: The volume shape was not a known type!
 *	  >0: Distance to the rear surface of the closest(?) volume intersection with ray (from eye to existing scanaorama point).
 *	  <0: Distance to the rear surface of the closest(?) volume intersection with ray (from eye to estimated non-existing scanaorama angular coordinate).
 */
kernel void selectPixelsIntersectingVolList2(
		global float *destDepthFlags,
		global struct stPixelRecord *canvasRecords,
		global const uchar4 *srcDepthBuffer,
		constant float *volumeParamBuffer,
		constant float *matFrustumToOctree,
		constant float *listMtxs,
		const int depthW, const int depthH)
{ //can't have constant/global on scalar inputs
	const int iWorker = get_global_id(0);//The worker ID, note that it will only span HALF the image domain, each worker is responsible for 2 pixels each.
	if (iWorker >= (depthW/2)*depthH) //Don't need workers more than half the image size
		return; //skip if out of bounds, may happen when rounding-up to achieve an image size evenly-divisible by group-size.
	int row[2]; //The "forward" and "reverse" row coordinate
	row[0] = iWorker / (depthW/2); //get the row in the direction of the pixel
	row[1] = depthH-1 - row[0]; //get the vertically 180 degree opposite row
	int col[2]; //The "forward" and "reverse" column coordinate
	col[0] = iWorker % (depthW/2); //Get the column in the left-half of the image
	col[1] = depthW/2 + col[0]; //180 degree phase shift horizontally in the column index (somewhere in the right-half of the image)
	//When true, performing a demolition (mask will always overwrite dest DB) instead of a construction (mask needs to store source DB dist for final tournament).
	uint pixel_id[2]; //The full image dest index for output
	float fDist[2] = {INFINITY,INFINITY}; //The source DB distance at the pixel (and 180 counterpart). INFINITY if NO POINT.
	bool bCheckIntersect[2];//True if the point was contained or NO point existed (meaning we need to check the intersection next)
	int iFlagOut[2];// = {0,0};//Will store the operation flag of the winning (closest) volume.
	for (int i = 0; i < 2; i++) { //Do CONTAINED for "forward" (0) and "reverse" (1) directions (the pixels which are exactly 180 degrees apart)
		pixel_id[i] = (row[i] * depthW) + col[i];//This is the pixel index in the FULL image!
		uchar4 depthInfo = srcDepthBuffer[pixel_id[i]];
		float3 localCoords = getDepthBufferLocalCoordinates(depthInfo, col[i], row[i], depthW, depthH);//xyz coordinates of the point relative to the station
		float fLocalDist = length(localCoords); //positive if the point exists, 0 if not (in the scanorama's pixel)
		/*
		 * PINEAPPLE TODO: This is where the mixing of demolitions/constructions breaks down. We need to determine which operation applies to each pixel
		 * but doing that would require refactoring below.
		 * In the case of exclusive demolition, we DONT want to set fDist[i] = fLocalDist, bur rather leave it INFINITY so the tournament can simply ALWAYS overwrite the point record in dest canvas.
		 * An exclusive Construction ALWAYS requires the 2nd step of checking intersection, wheras an exclusive demolition can do an early-out optimization.
		 * 1) If a demolition exists then call contains (and possibly intersect if no data existed).
		 * 2) If a construction exists ALWAYS check for intersection.
		 * 3) If both exist, both must be performed, can't even early-out when checking contains (of all volumes)!
		 */
		//Determine if a pixel exists and which volume operation is occuring, so determine if this point coordinate is contained in ANY of the volumes (only necessary for demolition because a construction only cares about intersection distance with existing source DB)
		float3 globalCoords = transformPoint_arrayVersion(matFrustumToOctree, localCoords); //Convert local to global coordinates
			bCheckIntersect[i] = touchesVolume2(volumeParamBuffer, globalCoords, listMtxs, NULL, NULL, iFlagOut); //determine if contained in list of 3D volumes
		if (iFlagOut[0] == 2) //A construction volume was involved with this pixel, MUST check for intersection in NEXT step below
			bCheckIntersect[i] = true;
		if (fLocalDist > 0.f && iFlagOut[0] != 1) //NOT a demolition, and a valid point already exists in the source DB
			fDist[i] = fLocalDist; //We will store that in the record canvas in case the tournament needs it.
		
	}
	
	float fMask[2] = {-INFINITY, -INFINITY}; //assume non-existent pixels initially

	if (bCheckIntersect[0] || bCheckIntersect[1]) { //At least 1 pixel point was either contained inside a volume, no pixel point exists, or construction always checks intersection.
		//So we need to know the distance to the rear face of the (closest) volume in demolition, but in construction we just need the distance to the existing point.
		float3 dirScan  = getLocalDirection_Panoramic((float)col[0]+0.5f, (float)row[0]+0.5f, depthW, depthH); //Unit vector direction from eye
		float3 dirOctree = transformVector_arrayVersion(matFrustumToOctree, dirScan); //Convert scan-local to octree-Data
		float3 zero = (float3)(0.f); //Zero vector: origin in local coordinates
		float3 globalOrigin = transformPoint_arrayVersion(matFrustumToOctree, zero); //Origin of station in global coordinates: station "eye".
		float fIntersect[2];//Will store the intersection distances in 0/180 directions
		touchesVolume2(volumeParamBuffer, globalOrigin, listMtxs, &dirOctree, fIntersect, iFlagOut); //Check if the line intersects any of the volumes, get the (negative) distance
		for (int i = 0; i < 2; i++) { //[0,180] directions
			if (!bCheckIntersect[i])
				continue; //Knew beforehand that this didn't need to be checked
			if (fIntersect[i] == INFINITY)
				continue; //No pixel ray intersection occurred
			//In the case of a "construction" any time we have a finite positive distance (intersection)
//			if (iFlagOut[i] == 2){ //Construction
//				fMask[i] = fDist[i]; //Override the (rear-volume) value and replace it with the depth buffer distance
//				canvasRecords[pixel_id[i]].m_destDistance = fDist[i];//It's also a maximum allowed distance for a contestant point to win a pixel tournament.
//			}
//			else { //Demolition
				//Only overwrite pixels that ray intersected, and negate the pixels that didn't exist at all.
				fMask[i] = fIntersect[i] * ((fDist[i] > 0.f) ? 1 : -1);
//			}
		}
	}

	for (int i = 0; i < 2; i++){ //at 0/180 degree pixel values
		canvasRecords[pixel_id[i]].m_index = -1; //To INIT the canvasRecords, at the very minimum we must set the index to -1.
		canvasRecords[pixel_id[i]].m_destDistance = fDist[i]; //This is the existing source depth buffer distance, in case we need to know later during the tournament. INFINITY for No pixel
		destDepthFlags[pixel_id[i]] = fMask[i];
	}
}

bool isSelectedMask(float maskDist) {
	return (maskDist != 0.f && maskDist != -INFINITY && maskDist != INFINITY);
}

/**
 * All this needs to know is the 2D mask to find orphaned pixels, doesn't care about the 3D point locations, colors, etc. (perhaps qualities though).
 * @param destDepthFlags
 * @param W, H Width and height of the image.
 */
kernel void smudgeMask(global float *destDepthFlags, const int W, const int H) {
	int index = get_global_id(0);
	int row = index / W;
	if (row >= H)
		return; //Out of bounds from excess unused workers.
	int col = index % W;
	float ratioNoInfoNeighborsDelete = 0.6f;
	int blankPixelsSearchHalfRange = 4;

	if ((row < blankPixelsSearchHalfRange) || (row >= H-blankPixelsSearchHalfRange)) //Ignore the upper/lower rows entirely where stencil goes out-of-bounds.
		return;
//	if (isSelectedMask(destDepthFlags[index])) //The point is valid, ignore it, we won't remove it TODO: maybe we should remove it if not enough neighbors are valid.
//		return;

	int nbNoInfoInNeighborhood = 0;
	int stencilSize = (2*blankPixelsSearchHalfRange+1)*(2*blankPixelsSearchHalfRange+1);
	for (int ck = -blankPixelsSearchHalfRange; ck <= blankPixelsSearchHalfRange; ck++) {
		int ic = col+ck;
		if (ic < 0) ic += W; //periodic wrapping
		else if (ic >= W) ic -= W; //periodic wrapping
		for (int rk=-blankPixelsSearchHalfRange; rk <= blankPixelsSearchHalfRange; rk++) {
			int ind2 = (row + rk)*W + ic;
			if (!isSelectedMask(destDepthFlags[ind2])) //Not a valid source point
				nbNoInfoInNeighborhood++;
		}
	}
	if ((float)nbNoInfoInNeighborhood/(float)stencilSize > ratioNoInfoNeighborsDelete) {
		destDepthFlags[index] = -INFINITY; //Delete this point
		return;
	}
}

/**
 * Given a single point from the octree, determine whether this scanning station can see it, has a valid different scanner sourceID,
 * and at which pixel coordinate it lies in the current (self) scanorama.
 * @param pointFull A single point-record from the octree, usually stores [x,y,z,color,stationID,...]
 * @param W, H The width and height of the depth buffer.
 * @param destDepthFlags The flag mask that describes which pixels need to be overwritten, ONLY
 * needed for demolition NOT virtual station (could pass-in an empty byte array).
 * @param allStationPositions A list of station positions {x,y,z}, where the list index is the
 * station's ID. Some indices may be empty storing position {0,0,0}.
 * @param destStationIndex The station ID which is receiving the patch to its depth/color buffers.
 * USE -1 if new virtual station! USE 0 if an external station (NOT in current octree)!
 * USE -2 if a "construction" operation that ignores the mask cropping.
 * @param mtxData2Scan Matrix that can transform octree project data to scan-local coordinate space.
 * @param listMtxs Concatenated matrix transformation list, 1 for each volume FOR EACH project! Each having 33 floats (two 4x4 matrices and a scalar) float values: [inv matrix, fwd matrix, scale]).
 * @param qualityVersion
 * @param volumeParamBuffer Describes the active demolition volumes.
 * @return The initial record for this point: if m_index is negative then the point isn't useful for this station, otherwise also populates the fields:
 * m_captureDistance, m_destDistance, m_ARGB, and m_align.
 */
struct stPixelRecord pointToRecord(const struct stPoint pointFull, const int W, const int H,
		global const float *destDepthFlags, global const float4 *allStationPositions, const int maxStationID,
		const int destStationIndex, constant float* mtxData2Scan, constant float *listMtxs,
		constant struct stDBPointQuality *qualityVersion, constant float *volumeParamBuffer)
{
	//NOTE: If the station indices are missing, it will return index=0!
	int captStationIndex = pointFull.m_iStation;//5th int is the station index where the point originated from.
	struct stPixelRecord recordOut; //The output record for this single work item.
	if (captStationIndex < 0) { //Octree is missing station index data for this point
		recordOut.m_index = -5;
		return recordOut;
	}
	if (captStationIndex == destStationIndex) {
		recordOut.m_index = -2;
		return recordOut; //Ignore the 3D point from "self" station.
	}

	float3 pointGlobal = (float3)(pointFull.x, pointFull.y, pointFull.z);//3D GLOBAL coordinates of the point from octree cloud.
	if (destStationIndex >= 0) { //NOT a virtual station
		if (touchesVolume2(volumeParamBuffer, pointGlobal, listMtxs, NULL, NULL, NULL)) { //If the point is contained within a demolition volume, we ignore it entirely.
			recordOut.m_index = -2;
			return recordOut;
		}
	}
	float3 ptScan = transformPoint_arrayVersion(mtxData2Scan, pointGlobal); //Convert 3D coordinate from octree data to scan-local.
	float2 deptBufferCoords; //Store the i,j index (col, row) of this point's pixel in the buffer image.

	float fDistEyeToPoint = motorLocalPoint2ucharAndCoord(ptScan.x, ptScan.y, ptScan.z, W, H, &recordOut.m_depth, &deptBufferCoords, qualityVersion[1].FOREIGN); //Get the pixel pack and index for the scanorama
	if (deptBufferCoords.x == -1 && deptBufferCoords.y == -1) {//Invalid point? local/global issue? non-zero matrix?
		recordOut.m_index = -4;
		return recordOut;
	}
	if (fDistEyeToPoint == 0.f) { //Should never happen
		recordOut.m_index = -4;
		return recordOut;
	}
	recordOut.m_index = (int)deptBufferCoords.y * W + (int)deptBufferCoords.x; //Save the 1D buffer index for this 3D point
	if (destStationIndex >= 0) { //NOT a virtual station NOR a "construction"
		//IF this entire station is virtual, then its ID is -1, and we can ignore the mask, as ALL pixels are empty and need to be filled.
		float maskDist = destDepthFlags[recordOut.m_index]; //Is this pixel within the desired demolition mask?
		if (maskDist == -INFINITY || maskDist == 0.f) { //If you want to show all the points, remove the 2nd filter condition here.
			recordOut.m_index = -1;
			return recordOut; //The pixel exists outside of ALL, or doesn't exist and doesn't intersect ANY, demo volumes. So it's not considered.
		}
	}
	//0-index is a blank placeholder, and the maxStationID is the top-most valid locator index (as indicated by the site *.loc files)
	if (captStationIndex > maxStationID || //This point claims to have an ID that exceeds the known station list, so just assume it's valid to be drawn
			captStationIndex == 0) { //This point has no source locator (may have been directly from another point cloud e57)
		recordOut.m_destDistance = fDistEyeToPoint; //Distance from source point to destination eye
		recordOut.m_captureDistance = 50.f; //Assume far (50 meter) distance
		recordOut.m_align = 0.f;//Just accept the point as having mediocre alignment
	}
	else { //Otherwise calculate the alignment from a pseudo-normal that we can estimate
		float3 captureStationPosition = allStationPositions[captStationIndex].xyz;//global coords
		float3 captureStationPositionLocal = transformPoint_arrayVersion(mtxData2Scan, captureStationPosition);//Local coords of station that capture this pixel
		float3 dirFromCaptureToPoint = ptScan - captureStationPositionLocal; //Direction to the point from the source Station
		float len = length(dirFromCaptureToPoint);
		if (len == 0.f) {//Should never happen
			recordOut.m_index = -6;
			return recordOut;
		}
		recordOut.m_captureDistance = len;//Save this distance to point from capture source.
		dirFromCaptureToPoint /= len; //Convert to unit vector
		float3 dirFromDestToPoint;
		/*if (destStationIndex >= 0) { //NOT a virtual station
			float3 destStationPosition = allStationPositions[destStationIndex].xyz; //global coords
			float3 destStationPositionLocal = transformPoint_arrayVersion(mtxData2Scan, destStationPosition);
			dirFromDestToPoint = ptScan - destStationPositionLocal; //Direction to the point from the dest. Station
		}
		else*/ //The matrix already describes the dest station as the local system,
			dirFromDestToPoint = ptScan; //So this single line can replace the lines above, since the origin is the current station's eye.
		//len = length(dirFromDestToPoint);

		recordOut.m_destDistance = fDistEyeToPoint; //Save this distance to point from destination station.
		dirFromDestToPoint /= fDistEyeToPoint; //Convert to unit vector
	
		recordOut.m_align = dot(dirFromDestToPoint, dirFromCaptureToPoint); //alignment between the 2 stations when viewing the same point (1 is best, -1 is worst)
	}
	//"maskDist" is definitely finite at this point. Positive if the point was "contained",
	//negative if the scanorama pixel didn't exist but "intersects" the demolition volume.
	//And in the "construction" case we don't even care about the mask (all points are projected just like for virtual stations).
	//This task has been delegated to the CPU/host side:
	/*if (maskDist < 0.f) //Point doesn't exist in the scanorama, so the negative distance to rear face of the intersected volume.
		maskDist *= -1.f;//Convert to positive distance
	if (recordOut.m_align < 0.4f) {//Not a good point to consider (at all), may be facing wrong direction
		//Create a pseudo-point in the direction at the known maskDist (volume surface)
		//ptScan = dirFromDestToPoint * maskDist;
		//recordOut.m_depth = motorLocalPoint2uchar(ptScan.x, ptScan.y, ptScan.z, W, H, alpha);//Get the pixel pack for the scanorama
		const float gridW = 0.5f;//Space between the gridlines (in meters)
		//if ((remainder(ptScan.x, gridW) < 0.5/gridW) /*&& //WORKS for gridW > 1.0
		//recordOut.m_destDistance = maskDist;
		//ptScan = maskDist * dirFromDestToPoint;
		//recordOut.m_depth = motorLocalPoint2uchar(ptScan.x, ptScan.y, ptScan.z, W, H, alpha);//Get the pixel pack for the scanorama
		//Highlight grid lines based on global grid:
		recordOut.m_gridColor = getGridColor(pointGlobal);
	}
	else { //keep the point, it will be included in the patch
		//ptScan = dirFromDestToPoint * dist;
		//recordOut.m_depth = motorLocalPoint2uchar(ptScan.x, ptScan.y, ptScan.z, W, H, alpha);//Get the pixel pack for the scanorama
		 */
		recordOut.m_ARGB = pointFull.m_color;
	//}
	return recordOut;
}

/**
 * Copy a single record to the global memory list, atomically! m_index must be CORRECT beforehand or it will write outside of valid memory!
 * @param canvasRecords A 2D output image canvas where each pixel contains a point-record struct.
 * @param newRecord Input that will be written to its pixel ID in canvasRecords.
 */
void copyRecord(global struct stPixelRecord *canvasRecords, const struct stPixelRecord newRecord) {
	//canvasRecords[newRecord.m_index] = newRecord;

	global struct stPixelRecord* oldRecord = canvasRecords + newRecord.m_index;//Get the old record based on the index
	atomic_xchg(&oldRecord->m_destDistance, newRecord.m_destDistance);
	atomic_xchg(&oldRecord->m_align, 		newRecord.m_align);
	atomic_xchg(&oldRecord->m_captureDistance, newRecord.m_captureDistance);
	atomic_xchg(&oldRecord->m_index, 		newRecord.m_index);
	atomic_xchg(((global int*)(&oldRecord->m_depth)), *((int*)(&newRecord.m_depth)));
	atomic_xchg(&oldRecord->m_ARGB,  		newRecord.m_ARGB);
	atomic_xchg(&oldRecord->m_numAve,  		newRecord.m_numAve);//Add 1 to the growing average number.
}

/**
 * A method to lock a record, turns out it wasn't necessary.
 */
void lockRecord(global struct stPixelRecord *canvasRecords) {
	int numAve;
	do {
		numAve = canvasRecords->m_numAve;
	} while(atomic_cmpxchg(&canvasRecords->m_numAve, numAve, -numAve) != numAve);
}

/**
 * @param oldAve The existing average value.
 * @param n Must be greater than 0!
 */
float runningAverage(float oldAve, float newValue, int n) {
	return oldAve * (n-1)/(float)n + newValue/n;
}

float4 runningAverageFloat4(float4 oldAve, float4 newValue, int n) {
	return oldAve * (n-1)/(float)n + newValue/n;
}

/**
 * @return The half-size of a brush (WxH) to paint the current point as pixel(s).
 * @param iIndex The pixel index.
 * @param fDist The point's distance from the camera eye.
 * @param W Image pixel span horizontally.
 * @param H Image pixel span vertically.
 * @return A half delta WxH that is added to the 1x1 pixel that is always drawn.
 */
float2 calcBrush(const int iIndex, const float fDist, const int W, const int H, const float fBrush)
{
	const int 	iRow = iIndex / W; //The row number from 0 to H-1.
	//Assumes some "focal" point where points should be drawn as 1 pixel large.
	const float fFocal = 10.f;//This is the distance where points will be drawn 1 pixel wide (no smaller farther, but larger closer)
	const float distEffect = fFocal/fDist; //Pixels closer to the station's eye will be drawn with a larger brush size
	const float phi = M_PI*((float)iRow/H - 0.5f); //wil range between +/-PI/2
	const float cosPhi = clamp(cos(phi), .001f, 1.f); //Prevent division by zero in next line
	const float phiEffect = 1.f/cosPhi; //Row zenith effect, closer to top/bottom of image uses a wider pixel brush.
	const float	MAXW = 50.f; //The maximum +/-width of the brush from the zenith-effect at the top/bottom of the spherical scanoramas.
	const float	MAXH = 4.f; //The maximum +/-height of the brush from the distance from camera
	//WARNING!!! OCL WILL CRASH if the area of the brush is too large! Clamp to reasonable levels here! User can specify a reduced 'fBrush' if they are having time-out crashes.
	//For example: GTX960 can't handle more than MAXW=30, MAXH=1.5, but 2060 no problem!
	const float brH = clamp(fBrush*(distEffect-1.f), 0.f, MAXH); //Subtract 1 because it's a delta from the base 1 pixel
	const float brW = clamp(fBrush*((distEffect * phiEffect)-1.f), 0.f, MAXW); //Subtract 1 because it's a delta from the base 1 pixel
	return (float2)(brW, brH);//All pixels will AT LEAST be drawn as 1x1, but this adds an additional +/-width and +/-height to it
}

/**Old method that replaces the old point with a better one with a simple overwrite if passes quality test.
 * Given a point from octree that fits into the demolition mask, distribute it to its respective pixel-record
 * if it's the *closest* distance to the destination station.
 * This MUST be done using atomics as multiple work-items may be trying to write to the SAME pixel-record. The wrong winner might "cheat" without atomics.
 * @param canvasRecords A 2D output image canvas where each pixel contains a point-record struct
 * @param newRecord A new input contending record to compete for its pixel slot in "canvasRecords".
 * @param W, H width and height of the output canvas image.
 */
void pointTournamentL1(global struct stPixelRecord *canvasRecords, struct stPixelRecord newRecord, const int W, const int H,
	constant struct stDrawParams *drawParams)
{
	const float toleranceRatio = 0.02f;
	const float2 hBrush = calcBrush(newRecord.m_index, newRecord.m_destDistance, W, H, drawParams->m_fBrush);

	const float closerRatio = 2.f; //Favors the new point if it is this factor closer compared to the old point's capture distance.
	const int baseInd = newRecord.m_index; //Brush will be centered around this pixel index.
	if (baseInd >= W*H) //Sometimes the indirection can be wrong
		return;
	for (int i = -hBrush.x; i <= round(hBrush.x); i++) { //Cycle through neighboring pixels
		for (int j = -hBrush.y; j <= round(hBrush.y); j++) { //Cycle through neighboring pixels
			//work_group_barrier(CLK_GLOBAL_MEM_FENCE);
			int col = baseInd % W + i;
			int row = baseInd / W + j;
			if (row < 0 || row >= H)
				continue;
			else if (col < 0)
				col += W;
			else if (col >= W) //Periodic condition
				col -= W;
			int ind = col+row*W; //The pixel index to overwrite

			struct stPixelRecord oldRecord = canvasRecords[ind];
			bool bCopy = false;
			if (oldRecord.m_index < 0) //Nothing was at this pixel previously, so add this point immediately
				bCopy = true;
			else if (newRecord.m_destDistance < (1-toleranceRatio)*oldRecord.m_destDistance) //This point is significantly in front of an existing point, so overwrite it entirely
				bCopy = true;
			//If the new point's position is "the same" as the old one:
			else if (newRecord.m_destDistance < (1+toleranceRatio)*oldRecord.m_destDistance) {
				if (i == 0 && j == 0 && newRecord.m_align > oldRecord.m_align-0.05 //Only allow this overwrite if this pixel is centered (not part of extra brush radius).
						&& newRecord.m_captureDistance < oldRecord.m_captureDistance*closerRatio) //Keep the one with better alignment
					bCopy = true;
				else if (newRecord.m_captureDistance < oldRecord.m_captureDistance/closerRatio) //Keep the new one if it has a MUCH closer capture distance.
					bCopy = true;
			}
			if (bCopy) {//Is the new record better than the old existing record?
				newRecord.m_index = ind; //So it knows where to draw itself
				copyRecord(canvasRecords, newRecord);//Copy to global memory atomically
			}
		}
	}
}

/**
 * New method that averages all the winners of the tournament together, and uses better brush sizing algorithm.
 * Given a point from octree that fits into the demolition mask, distribute it to its respective pixel-record
 * if it's the *closest* distance to the destination station.
 * This MUST be done using atomics as multiple work-items may be trying to write to the SAME pixel-record. The wrong winner might "cheat" without atomics.
 * @param canvasRecords A 2D output image canvas where each pixel contains a point-record struct
 * @param newRecord A new input contending record to compete for its pixel slot in "canvasRecords".
 */
void pointTournamentL2(global struct stPixelRecord *canvasRecords, const struct stPixelRecord newRecord, const int W, const int H,
	constant struct stDrawParams *drawParams)
{
	const int baseInd = newRecord.m_index; //Brush will be centered around this pixel index.
	if (baseInd >= W*H || baseInd % W == 0) //TODO: There are incoming points at column zero that shouldn't be there! Problem with converting 3D points to spherical coordinates?
		return;
	//Compute the half-size of a brush to paint the current point as pixel(s).
	//Then scales the brush size (from user settings), NOTE that when it returns 0x0 (pixels far away), the drawParams->m_fBrush has NO EFFECT!
	const float2 hBrush = calcBrush(newRecord.m_index, newRecord.m_destDistance, W, H, drawParams->m_fBrush);
	
	const float closerRatio = 2.f; //Favors the new point if it is this factor closer compared to the old point's capture distance.
	const float toleranceRatio = 0.02f;
	for (int i = -round(hBrush.x); i <= round(hBrush.x); i++) { //Cycle through neighboring pixels
		for (int j = -round(hBrush.y); j <= round(hBrush.y); j++) { //Cycle through neighboring pixels
			//work_group_barrier(CLK_GLOBAL_MEM_FENCE);
			int col = baseInd % W + i;
			int row = baseInd / W + j;
			if (row < 0 || row >= H)
				continue;
			else if (col < 0)
				col += W;
			else if (col >= W) //Periodic condition
				col -= W;
			int ind = col+row*W; //The pixel index to overwrite
			struct stPixelRecord oldRecord = canvasRecords[ind]; //Deep copy from global memory to local
			uchar iCopy = 0; // 0 = throw away, 1 = add to running ave, 2 = overwrite completely
			if (oldRecord.m_index < 0) //Nothing was at this pixel previously, so add this point immediately
				iCopy = 2; //Create a new point, just copy it directly
			else if (newRecord.m_destDistance < (1-toleranceRatio)*oldRecord.m_destDistance) //This point is significantly in front of an existing point
				iCopy = 2;
			//If the new point's position is "the same" as the old one:
			else if (newRecord.m_destDistance < (1+toleranceRatio)*oldRecord.m_destDistance) {
					//TODO: for some reason, using a very large decrease tolerance on the alignment works really well
				if (/*i == 0 && j == 0 &&*/ newRecord.m_align > oldRecord.m_align-0.8 //Only allow this overwrite if this pixel is centered (not part of extra brush radius).
						&& newRecord.m_captureDistance < oldRecord.m_captureDistance*closerRatio) //Keep the one with better alignment
					iCopy = 1; //Average it with the existing one
				//else if (newRecord.m_captureDistance < oldRecord.m_captureDistance/closerRatio) //Keep the new one if has a reasonably (larger is OK) capture distance.
				//	iCopy = 1; //Average it with the existing one
			}

			if (iCopy == 0) //Throw away this point
				continue;
			if (iCopy == 2) {//Need to reset it as the first point, overwrite anything already existing
				if (newRecord.m_destDistance > oldRecord.m_destDistance)
					continue; //Point is BEHIND existing scan data!
				oldRecord = newRecord;
				oldRecord.m_numAve = 1;
			}
			else {
				oldRecord.m_numAve++;
				float4 oldAverage	= convert_float4(*((uchar4*)&oldRecord.m_ARGB));
				float4 newValue		= convert_float4(*((uchar4*)&newRecord.m_ARGB));
				uchar4 temp			= convert_uchar4(runningAverageFloat4(oldAverage, newValue, oldRecord.m_numAve));
				oldRecord.m_ARGB	= *((int*)&temp);
				oldRecord.m_align			= runningAverage(oldRecord.m_align, newRecord.m_align, oldRecord.m_numAve);
				oldRecord.m_captureDistance	= runningAverage(oldRecord.m_captureDistance, newRecord.m_captureDistance, oldRecord.m_numAve);
				oldRecord.m_destDistance	= runningAverage(oldRecord.m_destDistance, newRecord.m_destDistance, oldRecord.m_numAve);
			}
			oldRecord.m_index = ind; //So it knows where to draw itself
//			copyRecord(canvasRecords, newRecord);//Copy to global memory atomically
			canvasRecords[ind] = oldRecord;
		}
	}
}

/**
 * Called for a single station to determine which points from the octree will fill-in the patch.
 * Determine where each 3D-point lies in the buffer-image coordinates (1D index), and also compute all of its depth-buffer data.
 * @param listPoints List of global 3D points from octree [x,y,z,ARGB,stationID].
 * @param numPoints The length of the listPoints vector (also length of listInd, listDepth, listColor, listAlign) and number of workitems.
 * @param chunkSize Number of pixels that each GPU worker should handle.
 * @param W, H width and height of the 1D scanorama buffers.
 * @param destDepthFlags The mask describing which pixels are relevant to be overwritten by the octree during demolition (see
 * same buffer in identifyImpactedPixels for format). HOWEVER, during construction phase, it will be overwritten as points are projected.
 * @param allStationPositions A list of station positions {x,y,z}, where the list index is the 
 * station's ID and the 0'th index is a blank placeholder. Some additional indices may also be empty positions as {0,0,0,0}.
 * @param destStationIndex The positive station ID which is receiving the patch to its depth/color buffers. (-1 if virtual station, -2 if construction)
 * @param mtxData2Scan Matrix that can transform octree project data to scan-local coordinate space.
 * @param listMtxs Concatenated matrix transformation list, 1 for each volume FOR EACH project! Each having 33 floats (two 4x4 matrices and a scalar) float values: [inv matrix, fwd matrix, scale]).
 * @param canvasPixels The only output from this function, a list of struct "stPixelRecord" containing information about each flattened 3D point onto the 2D buffer canvas.
 * @param drawParams Contains the colors/dimensions/enhancement level needed to project 3D points onto a canvas through tournament.
 * @param qualityVersion Array of versions each index has a quality struct.
 * @param volumeParamBuffer Describes the active demolition volumes.
 * @see stPixelRecord
 */
kernel void flattenPixelRecords(global const struct stPoint *listPoints, const int numPoints, const int chunkSize,
		const int W, const int H, global const float *destDepthFlags, global const float4 *allStationPositions,
		const int maxStationID, const int destStationIndex, constant float* mtxData2Scan, constant float *listMtxs,
		global struct stPixelRecord *canvasPixels, constant struct stDrawParams *drawParams,
		constant struct stDBPointQuality *qualityVersion, constant float *volumeParamBuffer)
{
	//uint global_id = get_global_id(0); //Get 3D point index (also worker ID)
	//const int chunkSize = 10;
	const int global_start = get_global_id(0) * chunkSize;
	const int global_end = min(global_start + chunkSize, numPoints);//Might be an early-out, CAREFUL FOR BARRIERS!
	//Each work-item will handle chunks of points, (10-100 each?) which should reduce atomic delays, assuming neighbors are near each other in the indices.
	for (int global_id = global_start; global_id < global_end; global_id++) {
		struct stPixelRecord pixelRecordNew = pointToRecord(listPoints[global_id], W, H, destDepthFlags,
				allStationPositions, maxStationID, destStationIndex, mtxData2Scan, listMtxs, qualityVersion, volumeParamBuffer);
		if (pixelRecordNew.m_index < 0) //Not a valid 3D point to consider in the patch
			continue;//Early-out, CAREFUL FOR BARRIERS!
		//This 3D-point record needs to be flattened onto the point-record-canvas using atomics to prevent simultaneous writes.
		switch (drawParams->m_iEnhanceLevel) {
		case 1: pointTournamentL1(canvasPixels, pixelRecordNew, W, H, drawParams); break;
		case 2: pointTournamentL2(canvasPixels, pixelRecordNew, W, H, drawParams); break;
//		default:
		}
	}
}

/**
 * Flatten the winning pixel records onto the output images. Worker is now a pixel, comparing the
 * flag mask with the winning canvas-pixel-records. If no suitable replacement pixel was found, a
 * pseudo-point is generated using the drawParams texture color settings and the distance stored
 * in the flag mask (to the rear face of the volume). THIS FUNCTION MUST set the output
 * imbDepth[pixel_id] AND imbImage[pixel_id] before exiting, regardless of how, otherwise it may
 * leave garbage or improperly initialized data.
 * @param imbDepth The depth buffer stores info about radial distance, angle offset, and quality (alpha channel).
 * @param imbImage The color image stores RGB color, and the alpha channel stores the intensity (almost like gray-scale).
 * @param canvasPixels The output pixel tournament results.
 * @param depthFlags Will be garbage if (iStationIndex < 0), otherwise it's the mask describing
 * which pixels are relevant to be overwritten by the octree (see destDepthFlags from
 * identifyImpactedPixels for format). NOTE that for a "construction" (NOT a "destruction") this
 * mask pixel should be set to valid anywhere that a valid point is projected onto the canvas records!
 * @param srcMatrix The local-to-global (frustum-to-octree) transformation matrix.
 * @param W,H width and height of the depth buffer.
 * @param drawParams Contains the colors/dimensions needed to draw the "empty" pattern onto the color image.
 * @param qualityVersion An array storing the quality used by various Depth buffer versions.
 * @param iStationIndex The destination station/locator index that is having its data
 * overwritten. Or -1 if it's a NEW virtual station, OR -2 if this is a construction projection (should ignore mask)!
 */
kernel void drawPixelRecords(global uchar4 *imbDepth, global uchar4 *imbImage,
		global const struct stPixelRecord *canvasPixels, global const float *depthFlags,
		constant float *srcMatrix, const int W, const int H, constant struct stDrawParams *drawParams,
		constant struct stDBPointQuality *qualityVersion, const int iStationIndex)
{
	uint pixel_id = get_global_id(0);
	if (pixel_id >= W*H) //It's often the case that more workers are allocated than required (the image pixel count here), so they are forced to just idle.
		return; //Skip worker out of bounds
	float maskDist;
	int bCreatePseudoVolSurface = 0;
	struct stPixelRecord winningPixel = canvasPixels[pixel_id]; //Get the winner of the tournament, 0-1 for each pixel.
//	int all = (winningPixel.m_align+1.f)/2*255;//DEBUG: The grayscale color of the alignment

	if (iStationIndex >= 0) { //NOT a NEW virtual station, DEMOLITION ONLY!
		maskDist = fabs(depthFlags[pixel_id]); //At this point, the only non-null pixelRecords are finite mask values (negative for non-existent but intersecting) positive for contained in volume.
		if (maskDist == 0 || fabs(maskDist) == INFINITY) { //Point already exists on scanorama or didn't intersect the demolish volume.
			imbImage[pixel_id] = 0; //Empty ARGB
			imbDepth[pixel_id] = toARGBu(qualityVersion[1].UNDEFINED,0,0,0); //No Point
			return; //This point isn't under consideration; outside of the patch.
		}
	}
	else if (iStationIndex == -1)//NEW Virtual station.
		maskDist = 10.f; //If a point doesn't exist, then assume a 10m sphere from the center ONLY for drawing the colors, the depth buffer will contain NOPOINT.
							//Draw a sphere of this radius in [m] when NO DATA was found. Should always be positive distance by this point
//	else //"construction", we can directly set the distance (so that pixel will always be drawn)
//		depthFlags[pixel_id] = 2.f*winningPixel.m_destDistance;
	uchar4 iDepth;
	if (winningPixel.m_index >= 0) { //A pixel exists, is it good enough to use?
		if (winningPixel.m_align < drawParams->m_fAlign) //This pixel is not a good candidate, Minimum allowable alignment value NOT met. But we will still consider drawing its distance as a pseudo point.
			bCreatePseudoVolSurface = 2; //iDepth will be calculated later to show it as a pseudo point
		else { //The point is good, draw it as-is:
//			imbImage[pixel_id] = toARGBu(255, all, all, all);
			imbImage[pixel_id] = int2UChar4(winningPixel.m_ARGB);//This came from an int, and is being cast back as one, so the endianness swap keeps it ok on the Java host side.
			if (iStationIndex < 0) //Virtual station
				winningPixel.m_depth.s3 = qualityVersion[1].FOREIGN; //set the alpha channel as FOREIGN quality
			imbDepth[pixel_id] = winningPixel.m_depth;
			return;
		}
	}
	else { //No octree point has claimed this pixel, it still has no applicable information
		if (iStationIndex < 0) { //A virtual station will create a pseudo surface in the IMAGE only, not in the DEPTH
			iDepth = toARGBu(qualityVersion[1].UNDEFINED, 0, 0, 0);
		}
		else
			bCreatePseudoVolSurface = 1; //iDepth will be calculated later
	}
	//The pixel is empty, and must be filled with some pseudo data, both in color image and depth.
	//A "construction" won't paint any color on missing points, but a virtual station will
	if (iStationIndex == -2) //Construction, should NOT modify pixels that have no points projected from octrees.
		return;
	int iRow = (int)(pixel_id / W);
	int iCol = (int)(pixel_id % W);
	float3 dirScan  = getLocalDirection_Panoramic((float)iCol+0.5f, (float)iRow+0.5f, W, H);//Unit vector direction from eye
	float3 localPoint = dirScan * maskDist;//Create a local pseudo-point at the volume's surface.
	float3 pointGlobal = transformPoint_arrayVersion(srcMatrix, localPoint); //Convert to 3D GLOBAL coordinate.
	uchar4 argb = getDemoColor(pointGlobal, drawParams[0]); //Whether creating a PSEUDO point or leaving NOPOINT, we still want to draw image ARGB.
	//argb.s3 = 255; //Set the Intensity (alpha channel) to full
//	uchar4 argb = toARGBu(255, all, all, all);

	if (bCreatePseudoVolSurface > 0) { //Create a pseudo-point in the depth buffer using the surface volume stored in the mask
		iDepth = motorLocalPoint2uchar(localPoint.x, localPoint.y, localPoint.z, W, H, qualityVersion[1].PSEUDO);//Create a depth point with the Psuedo quality
	}
	imbImage[pixel_id] = argb; //Set the ARGB output image color+intensity
	imbDepth[pixel_id] = iDepth; //create a depth info pixel from the maskDist;
}

/**Smudge an image using the neighborhood average. Only considers valid points in the boolean mask.
@param imgBuffer [in/out] Only the non-valid points will be altered using valid neighbors.
 */
/*kernel void fillBlankPixelsFromNeighbors(global uchar4 *imgBuffer, global const char *validFlag,
											int W, int H, int blankPixelsSearchHalfRange, float ratioNoInfoNeighborsMaxAllowed, float ratioNoInfoNeighborsDelete) {
	int index = get_global_id(0);
	int row = index / W;
	int col = index % W;
	if (row >= H) return; //Out of bounds

	if ((row < blankPixelsSearchHalfRange) || (row >= H-blankPixelsSearchHalfRange)) //Ignore the upper/lower rows entirely where stencil goes out-of-bounds.
		return;

	if (validFlag[index] == 1) //The point is valid, ignore it
		return;

	int nbNoInfoInNeighborhood = 0;
	int vignetSize = (2*blankPixelsSearchHalfRange+1)*(2*blankPixelsSearchHalfRange+1);
	for (int ck = -blankPixelsSearchHalfRange; ck <= blankPixelsSearchHalfRange; ck++) {
		int ic = col+ck;
		if (ic < 0) ic += W; //periodic wrapping
		else if (ic >= W) ic -= W; //periodic wrapping
		for (int rk=-blankPixelsSearchHalfRange; rk <= blankPixelsSearchHalfRange; rk++) {
			int ind2 = (row + rk)*W + ic;
			if (validFlag[ind2] == 0)//Not a valid source point
				nbNoInfoInNeighborhood++;
		}
	}
	if ((float)nbNoInfoInNeighborhood/(float)vignetSize > ratioNoInfoNeighborsDelete) {
		imgBuffer[index] = 0;//Delete this point
		return;
	}
	if ((float)nbNoInfoInNeighborhood/(float)vignetSize > ratioNoInfoNeighborsMaxAllowed)
		return; //Not enough valid info contained inside the stencil to update the invalid point

	float4 localSums = (float4)(0.f); //The sums of each ARGB component
	int localPtCtr = 0; //Number of valid neighbors
	for (int ck = -blankPixelsSearchHalfRange; ck <= blankPixelsSearchHalfRange; ck++) {
		int ic = col+ck;
		if (ic < 0) ic += W; //periodic wrapping
		else if (ic >= W) ic -= W; //periodic wrapping
		for (int rk = -blankPixelsSearchHalfRange; rk <= blankPixelsSearchHalfRange; rk++) {
			int ir = row + rk;
			int ind2 = ir*W + ic;
			if (validFlag[ind2] == 1) {
				uchar4 neighbor = imgBuffer[ind2];
				localSums.x += neighbor.x;
				localSums.y += neighbor.y;
				localSums.z += neighbor.z;
				localSums.w += neighbor.w;
				localPtCtr++;
			}
		}
	}
	if (localPtCtr == 0)
		return; //No neighbors were considered.

	localSums = localSums / localPtCtr;//compute average ARGB (should all be 0-255 now)
	uchar4 interpolatedValue = (uchar4)(0);
	interpolatedValue.x = (uchar)localSums.x;//cast to A component
	interpolatedValue.y = (uchar)localSums.y;//cast to R component
	interpolatedValue.z = (uchar)localSums.z;//cast to G component
	interpolatedValue.w = (uchar)localSums.w;//cast to B component
	imgBuffer[index] = interpolatedValue;
}*/

/**Overwrite the alpha channel (and RGB) of the depth buffer based on the incoming point list.
@param imgBuffer [in/out] Only the non-valid points will be altered using valid neighbors.
@param recordList List of xyzc points to overwrite onto the depth buffer. Usually comes from an e57 that MUST store them in locator LOCAL, REAL-WORLD coordinates!
//@param srcMtxInv Global to local matrix transformation. THIS WAS REMOVED, 
@param qualityVersion A single struct pointer, containing a list of ALL quality types.
@param numRecords The length of the recordList points.
@param w,h Width and Height of the image.
@param fBrushRadius The brush radius (in meters) describing the brush's "sphere" of influence when applying the overwrite onto the channel.
 */
kernel void cleanPoints(global uchar4 *imgBuffer, global const float4 *recordList, /*constant float *srcMtxInv,*/
		constant struct stDBPointQuality *qualityVersion, int numRecords, int w, int h, float fBrushRadius) {
	uint recordID = get_global_id(0);
	if (recordID >= numRecords) return; //Beyond of the record length
	float4 pointFull = recordList[recordID];//xyzc
	float3 pointGlobal = (float3)(pointFull.x, pointFull.y, pointFull.z);
	float3 pointLocal = pointGlobal;//transformPoint_arrayVersion(srcMtxInv, pointGlobal); //Convert to 3D LOCAL coordinate.
	float2 deptBufferCoords; //Store the i,j index (col, row) of this point's pixel as floats (not rounded) from the buffer image.
	uchar4 iARGB;
	bool bRemove;
	uchar alpha = qualityVersion[1].FOREIGN;
	pointLocal = realCoordinates2MotorCoordinates(pointLocal);
	const float dist = motorLocalPoint2ucharAndCoord(pointLocal.x, pointLocal.y, pointLocal.z, w, h, &iARGB, &deptBufferCoords, alpha); //Get the pixel pack and index for the scanorama, also returns the distance
	if (dist == 0.f) // The x,y,z were all zero, which is reserved for null points
		return;
	const int srcPixelID = (int)deptBufferCoords.x + ((int)deptBufferCoords.y)*w;
	const int iBrushRadius = min(10, (int)ceil(fBrushRadius/dist*w/2.f/M_PI)); //sector angle (given point's distance) divided by a single constant pixel angle.
	int neighbPixelID, col;
	//Search the stencil for points to remove
	for (int row = (int)deptBufferCoords.y-iBrushRadius; row <= (int)deptBufferCoords.y+iBrushRadius; row++) { //row
		if (row < 0 || row >= h) continue;
		for (int i = (int)deptBufferCoords.x-iBrushRadius; i <= (int)deptBufferCoords.x+iBrushRadius; i++) { //column
			if (i < 0) col = i + w; //Periodic in x-
			else if (i >= w) col = i - w; //Periodic in x+
			else col = i;
			neighbPixelID = col + row*w;
			//Get the xyz of the neighboring point, and find its distance to the demo point.
			iARGB = imgBuffer[neighbPixelID]; //Get the 3D point associated with this pixel, only its alpha channel will be replaced
			if (*(uint*)&iARGB << 8 == 0) //No point exists in the destination image? If RGB == 0 (left-shift 8 ignores alpha) then the point is already invalid. Probably flagged as skimmed. We could check the alpha channel's value also to see if it's not STANDARD.
				continue;
			//float3 neighbPoint = getDepthBufferLocalCoordinates(iARGB, col, row, w, h);
			//if (length(pointLocal-neighbPoint) > fBrushRadius) //Is it contained in the sphere?
			//if (!containedInCube(neighbPoint, pointLocal, fBrushRadius)) //Is it contained in the BB?
			//	continue; //Neighbor point lies OUTSIDE of centerpoint's volume of influence.
			//Flag the point as "cleaned" from the buffer:
			imgBuffer[neighbPixelID] = setChannelFastu(iARGB, 0, qualityVersion[1].CLEANED); //Set the alpha channel to CLEANED
		}
	}
}
