Anonymous avatar Anonymous committed 93836b3

Fixed the problem where A was not being generated on the GPU when the problem size became too large.

Comments (0)

Files changed (6)

src/solvers/NavierStokes/FadlunEtAl/generateA.inl

 	     *LVals = thrust::raw_pointer_cast(&(L.values[0])),
 	     *AVals = thrust::raw_pointer_cast(&(A.values[0]));
 
-	const int blockSize = 256;
-	dim3 dimGrid( int((ASize-0.5)/blockSize) + 1, 1);
+	const int blockSize = 256,
+	          gridSize  = 1000;
+	dim3 dimGrid(gridSize, 1);
 	dim3 dimBlock(blockSize, 1);
 
 	kernels::generateAFadlun <<<dimGrid, dimBlock>>> (ARows, ACols, AVals, MVals, LRows, LCols, LVals, ASize, alpha, tags_r);

src/solvers/NavierStokes/FadlunEtAl/tagPoints.inl

 	{
 		for(int i=0; i<nx-1; i++)
 		{
+			// index of the current point on the u-grid
 			I = j*(nx-1)+i;
+			
+			// tags and coefficients
 			tagsX[I] = -1;
 			tagsY[I] = -1;
 			coeffsX[I] = 0.0;
 			coeffsY[I] = 1.0;
+			
+			// initial indices of the points on the body that define the segment under consideration
+			k = NSWithBody<memoryType>::B.totalPoints - 1;
+			l = 0;
+			
 			outsideX = true;
 			outsideY = true;
-			k = NSWithBody<memoryType>::B.totalPoints - 1;
-			l = 0;
-			flag = false;
 			bdryFlagX = -1;
 			bdryFlagY = -1;
+			flag = false;
 			
-			// CHANGE THIS FOR MULTIPLE BODIES
-			
+			// cycle through all the segments on the body surface
 			while( l < NSWithBody<memoryType>::B.totalPoints)
 			{
+				// figure out which of the two end points of the segment are at the bottom and the left
 				if (by[k] > by[l])
 				{
 					bottom = l;
 					left = k;
 					right = l;
 				}
+				
 				// consider rays along the x-direction
-				/**
-				* if the ray intersects the boundary segment
-				* top endpoint must be strictly above the ray
-				* bottom can be on or below the ray
-				*/
+				// if the ray intersects the boundary segment (top endpoint must be strictly above the ray bottom can be on or below the ray)
 				if (by[bottom]-eps < yu[j] && by[top]-eps > yu[j])
 				{
 					if (fabs(by[l]-by[k]) > eps)

src/solvers/NavierStokes/FadlunEtAl/updateRHS1.inl

 	     ny = domInfo->ny;
 	
 	int  numU  = (nx-1)*ny,
-		 numUV = (nx-1)*ny + nx*(ny-1);
+	     I = 0;
 	
-	for(in j=0; j<ny; j++)
+	for(int j=0; j<ny; j++)
 	{
 		for(int i=0; i<nx-1; i++)
 		{
 		}
 	}
 	
-	for(in j=0; j<ny-1; j++)
+	for(int j=0; j<ny-1; j++)
 	{
 		for(int i=0; i<nx; i++)
 		{

src/solvers/NavierStokes/NavierStokes/generateA.inl

 	     *LVals = thrust::raw_pointer_cast(&(L.values[0])),
 	     *AVals = thrust::raw_pointer_cast(&(A.values[0]));
 
-	const int blockSize = 256;
-	dim3 dimGrid( int((ASize-0.5)/blockSize) + 1, 1);
+	const int gridSize  = 1000,
+	          blockSize = 256;
+	dim3 dimGrid(gridSize, 1);
 	dim3 dimBlock(blockSize, 1);
 	
 	kernels::generateA <<<dimGrid, dimBlock>>> (ARows, ACols, AVals, MVals, LRows, LCols, LVals, ASize, alpha);

src/solvers/NavierStokes/NavierStokesSolver.cu

 #include <solvers/NavierStokes/TairaColoniusSolver.h>
 #include <sys/stat.h>
 
+#include <io/io.h>
+
 //##############################################################################
 //                              INITIALISE
 //##############################################################################
 	
 	q.resize(numQ);
 	qStar.resize(numQ);
-		qOld.resize(numQ);
+	qOld.resize(numQ);
 	rn.resize(numQ);
 	H.resize(numQ);
 	bc1.resize(numQ);
 	generateL();
 	generateA(intgSchm.alphaImplicit[subStep]);
 	PC1 = new preconditioner< cusp::coo_matrix<int, real, memoryType> >(A, (*paramDB)["velocitySolve"]["preconditioner"].get<preconditionerType>());
-	generateBN();
+	generateBN();	
 	
 	logger.stopTimer("assembleMatrices");
 
 
 	cusp::default_monitor<real> sys1Mon(rhs1, maxIters, relTol);
 	cusp::krylov::bicgstab(A, qStar, rhs1, sys1Mon, *PC1);
-	//cusp::krylov::cg(A, qStar, rhs1, sys1Mon);//, PC1);
+	//cusp::krylov::cg(A, qStar, rhs1, sys1Mon, *PC1);
 	iterationCount1 = sys1Mon.iteration_count();
 	if (!sys1Mon.converged())
 	{

src/solvers/NavierStokes/kernels/generateA.cu

 __global__
 void generateA(int *ARows, int *ACols, real *AVals, real *MVals, int *LRows, int *LCols, real *LVals, int ASize, real alpha)
 {
-	int  I = threadIdx.x + blockIdx.x*blockDim.x;
-	
-	if(I >= ASize) return;
-
-	ARows[I] = LRows[I];
-	ACols[I] = LCols[I];
-	AVals[I] = -alpha*LVals[I] + (LRows[I]==LCols[I])*MVals[LRows[I]];
+	for (int I=threadIdx.x + blockIdx.x*blockDim.x; I<ASize; I += blockDim.x*gridDim.x)
+	{
+		ARows[I] = LRows[I];
+		ACols[I] = LCols[I];
+		AVals[I] = -alpha*LVals[I] + (LRows[I]==LCols[I])*MVals[LRows[I]];
+	}
 }
 
 __global__
 void generateAFadlun(int *ARows, int *ACols, real *AVals, real *MVals, int *LRows, int *LCols, real *LVals, int ASize, real alpha, int *tags)
 {
-	int  I = threadIdx.x + blockIdx.x*blockDim.x;
-	
-	if(I >= ASize) return;
-
-	ARows[I] = LRows[I];
-	ACols[I] = LCols[I];
-	AVals[I] = -(tags[LRows[I]] == -1)*alpha*LVals[I] - (tags[LRows[I]] != -1)*LVals[I] + (LRows[I]==LCols[I])*MVals[LRows[I]];
+	for(int I=threadIdx.x + blockIdx.x*blockDim.x; I<ASize; I += blockDim.x*gridDim.x)
+	{
+		ARows[I] = LRows[I];
+		ACols[I] = LCols[I];
+		AVals[I] = -(tags[LRows[I]] == -1)*alpha*LVals[I] - (tags[LRows[I]] != -1)*LVals[I] + (LRows[I]==LCols[I])*MVals[LRows[I]];
+	}
 }
 	
 } // end of namespace kernels
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.