Anonymous avatar Anonymous committed 3b2c812

Routines to update QT and E on the GPU are now included, replacing previous inefficient functions.

Comments (0)

Files changed (17)

 # the implicit rule .cu.o converts src/parameterDB.cu to src/parameterDB.o
 
 # link all the object files and libraries to create the executable bin/cuIBM
-cuibm: $(LIBS) $(EXT_LIBS) src/parameterDB.o src/bodies.o src/cuIBM.o
+cuibm: $(LIBS) $(EXT_LIBS) src/helpers.o src/parameterDB.o src/bodies.o src/cuIBM.o
 	nvcc $^ -o bin/cuIBM
 
 #src/preconditioner.o
 #  1. delete all the .a and .o files
 #  2. cd to the concerned folders
 #  3. run 'make clean' as defined in the Makefiles in those folders
-.PHONY: clean
+.PHONY: clean cleanall
+
 clean:
 	@rm -f lib/*.a bin/cuIBM src/*.o
 	cd src/solvers; $(MAKE) $(MFLAGS) clean
+	cd src/io; $(MAKE) $(MFLAGS) clean
+
+cleanall:
+	@rm -f lib/*.a bin/cuIBM src/*.o
+	cd src/solvers; $(MAKE) $(MFLAGS) clean
 	cd src/cusp; $(MAKE) $(MFLAGS) clean
 	cd src/io; $(MAKE) $(MFLAGS) clean
 	cd external; $(MAKE) $(MFLAGS) clean
 
 * Linker step fails on Mac OS X
 * Cannot choose the Krylov solver.
+* TairaColoniusSolver and FadlunEtAlSolver fail if no body is present.
 * TairaColoniusSolver does not calculate forces if multiple bodies are present.
 * Forces are not calculated for the direct forcing method (FadlunEtAlSolver)
 * FadlunEtAlSolver has not been fully validated
 	std::cout << "DONE!" << std::endl;
 }
 
-/*
-template <typename memoryType>
-void bodies<memoryType>::initialise(flowDescription &F, domain &D)
-{
-}
-*/
-
 template <typename memoryType>
 void bodies<memoryType>::calculateCellIndices(domain &D)
 {
 		J[k] = j;
 	}
 }
-/**
-* \brief This function determines the bounding box around each body.
-*/
+
 template <typename memoryType>
 void bodies<memoryType>::calculateBoundingBoxes(domain &D)
 {
 */
 
 #include <types.h>
+#include <helpers.h>
 #include <domain.h>
 #include <io/io.h>
 #include <solvers/NavierStokes/NavierStokesSolver.h>
+/*
+*  Copyright (C) 2012 by Anush Krishnan, Simon Layton, Lorena Barba
+*
+*  Permission is hereby granted, free of charge, to any person obtaining a copy
+*  of this software and associated documentation files (the "Software"), to deal
+*  in the Software without restriction, including without limitation the rights
+*  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+*  copies of the Software, and to permit persons to whom the Software is
+*  furnished to do so, subject to the following conditions:
+*
+*  The above copyright notice and this permission notice shall be included in
+*  all copies or substantial portions of the Software.
+*
+*  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+*  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+*  THE SOFTWARE.
+*/
+
+#include <helpers.h>
+
+real dhRoma(real x, real h)
+{
+	real r = fabs(x)/h;
+	
+	if(r>1.5)
+		return 0.0;
+	else if(r>0.5 && r<=1.5)
+		return 1.0/(6*h)*( 5.0 - 3.0*r - sqrt(-3.0*(1-r)*(1-r) + 1.0) );
+	else
+		return 1.0/(3*h)*( 1.0 + sqrt(-3.0*r*r + 1.0) );
+}
+
+real delta(real x, real y, real h)
+{
+	return dhRoma(x, h) * dhRoma(y, h);
+}

src/include/helpers.h

+/*
+*  Copyright (C) 2012 by Anush Krishnan, Simon Layton, Lorena Barba
+*
+*  Permission is hereby granted, free of charge, to any person obtaining a copy
+*  of this software and associated documentation files (the "Software"), to deal
+*  in the Software without restriction, including without limitation the rights
+*  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+*  copies of the Software, and to permit persons to whom the Software is
+*  furnished to do so, subject to the following conditions:
+*
+*  The above copyright notice and this permission notice shall be included in
+*  all copies or substantial portions of the Software.
+*
+*  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+*  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+*  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+*  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+*  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+*  THE SOFTWARE.
+*/
+
+#pragma once
+
+#include <types.h>
+
+/**
+* @file  helpers.h
+*/
+
+real dhRoma(real x, real h);
+
+real delta(real x, real y, real h);

src/include/solvers/NavierStokes/NavierStokesSolver.h

 	* Initialises stuff common to all IBM solvers
 	*/
 	void initialiseCommon();
+	
+	/**
+	* @brief Initialises all required arrays
+	* @param numQ Total number velocity variables (u and v)
+	* @param numLambda Number of pressure variables + twice the number of body force variables
+	*/
 	void initialiseArrays(int numQ, int numLambda);
 	void initialiseFluxes();
 	void initialiseBoundaryArrays();
 	void assembleMatrices(); // contains subfunctions to calculate A, QT, BN, QTBNQ
 
-	// functions to generate matrices
-	
-	void generateM();
-	
 	// Methods are defined as virtual when they are redefined in a derived class with the same name.
 	
+	// functions to generate matrices
+	void generateM();
 	virtual void generateL();
 	virtual void generateA(real alpha);
-	void generateBN();
-	
+	void generateBN();	
 	virtual void generateQT();
 	void updateQ(real gamma);
 	

src/include/solvers/NavierStokes/kernels/generateQT.h

 #pragma once
 
 #include <types.h>
+#include <helpers.h>
 
 namespace kernels
 {
 
-__global__
+__global__ \
 void updateQ(int *QRows, int *QCols, real *QVals, int QSize, int *tags);
 
-__global__
+__global__ \
 void updateQ(int *QRows, int *QCols, real *QVals, int QSize, int *tagsX, int *tagsY);
 
 void generateQT(int *QTRows, int *QTCols, real *QTVals, int nx, int ny);
 
+__global__ \
+void updateQT(int *QTRows, int *QTCols, real *QTVals,
+              int *ERows,  int *ECols,  real *EVals,
+              int nx, int ny, real *x, real *y, real *dx,
+              int totalPoints, real *xB, real *yB, int *I, int *J);
 }
 all: objects install force_look
 
 objects: $(OBJ)
-	$(AR) $(ARFLAGS) ruv $(LIBNAME) $^
+	$(AR) $(ARFLAGS) ruv $(LIBNAME) $?
 	$(RANLIB) $(LIBNAME)
 
 install: objects
 	std::string ecc = deviceProp.ECCEnabled ? "yes" : "no";
 	std::cout << "Compute capability = " << deviceProp.major << "." << deviceProp.minor << std::endl;
 	std::cout << "ECC Enabled = " << ecc << std::endl;
-	std::cout << std::endl;
 }
 
 void printTimingInfo(Logger &logger)

src/solvers/NavierStokes/Makefile

 # the implicit rule .cu.o defined in Makefile.inc is used to compile these
 # then ar is used to create a library from the object files
 # $^ lists all dependencies ($(OBJS) in this case)
+# $? lists only those dependencies that have been updated
 objects: $(OBJS)
-	$(AR) $(ARFLAGS) ruv ../$(LIBNAME) $^
+	$(AR) $(ARFLAGS) ruv ../$(LIBNAME) $?
 	$(RANLIB) ../$(LIBNAME)
 
 force_look:

src/solvers/NavierStokes/NavierStokesSolver.cu

 	logger.stopTimer("initialiseCommon");
 }
 
-/**
-* \brief Initialises all required arrays
-* \param numQ Total number velocity variables (u and v)
-* \param numLambda Number of pressure variables + twice the number of body force variables
-*/
 template <typename memoryType>
 void NavierStokesSolver<memoryType>::initialiseArrays(int numQ, int numLambda)
 {	
 	}
 	real dt = (*paramDB)["simulation"]["dt"].get<real>();
 	calculateForce();
-//	io::writeForce(folderName, timeStep*dt, forceX, forceY);
 	forceFile << timeStep*dt << '\t' << forceX << '\t' << forceY << std::endl;
 	iterationsFile << timeStep << '\t' << iterationCount1 << '\t' << iterationCount2 << std::endl;
-//	io::writeIterations(iterationsFile, folderName, timeStep, iterationCount1, iterationCount2);
 	
 	logger.stopTimer("output");
 }
 void NavierStokesSolver<memoryType>::shutDown()
 {
 	io::printTimingInfo(logger);
-//	forceFile.close();
+	forceFile.close();
 	iterationsFile.close();
 }
 

src/solvers/NavierStokes/TairaColonius/calculateForce.inl

 	
 	real dx = NavierStokesSolver<memoryType>::domInfo->dx[ NSWithBody<memoryType>::B.I[0] ],
 	     dy = dx;
-	
+
 	thrust::copy(NavierStokesSolver<memoryType>::lambda.begin() + nx*ny, NavierStokesSolver<memoryType>::lambda.end(), f.begin());
 	cusp::multiply(ET, f, F);
 	NavierStokesSolver<memoryType>::forceX = (dx*dy)/dx*thrust::reduce( F.begin(), F.begin()+(nx-1)*ny );

src/solvers/NavierStokes/TairaColonius/generateQT.inl

 
 #include <solvers/NavierStokes/kernels/generateQT.h>
 
-real dh_roma(real x, real h)
+#define BLOCKSIZE 256
+
+template <>
+void TairaColoniusSolver<host_memory>::updateQT()
 {
-	real r = fabs(x)/h;
-	
-	if(r>1.5)
-		return 0.0;
-	else if(r>0.5 && r<=1.5)
-		return 1.0/(6*h)*( 5.0 - 3.0*r - sqrt(-3.0*(1-r)*(1-r) + 1.0) );
-	else
-		return 1.0/(3*h)*( 1.0 + sqrt(-3.0*r*r + 1.0) );
 }
 
-real delta(real x, real y, real h)
+template <>
+void TairaColoniusSolver<device_memory>::updateQT()
 {
-	return dh_roma(x, h) * dh_roma(y, h);
+	logger.startTimer("updateQT");
+	
+	int  *QTRows = thrust::raw_pointer_cast(&(QT.row_indices[0])),
+	     *QTCols = thrust::raw_pointer_cast(&(QT.column_indices[0]));
+	real *QTVals = thrust::raw_pointer_cast(&(QT.values[0]));
+	
+	int  *ERows = thrust::raw_pointer_cast(&(E.row_indices[0])),
+	     *ECols = thrust::raw_pointer_cast(&(E.column_indices[0]));
+	real *EVals = thrust::raw_pointer_cast(&(E.values[0]));
+	
+	real *x  = thrust::raw_pointer_cast(&(domInfo->xD[0])),
+	     *y  = thrust::raw_pointer_cast(&(domInfo->yD[0])),
+	     *dx = thrust::raw_pointer_cast(&(domInfo->dxD[0]));
+	
+	real *xB = thrust::raw_pointer_cast(&(B.x[0])),
+	     *yB = thrust::raw_pointer_cast(&(B.y[0]));
+	
+	int *I = thrust::raw_pointer_cast(&(B.I[0])),
+	    *J = thrust::raw_pointer_cast(&(B.J[0]));
+	
+	int  nx = domInfo->nx,
+	     ny = domInfo->ny;
+	     
+	dim3 dimGrid(int((B.totalPoints-0.5)/BLOCKSIZE)+1, 1);
+	dim3 dimBlock(BLOCKSIZE, 1);
+
+	kernels::updateQT <<<dimGrid, dimBlock>>>
+	         (QTRows, QTCols, QTVals,
+	          ERows,  ECols,  EVals,
+	          nx, ny, x, y, dx,
+              B.totalPoints, xB, yB, I, J);
+    
+    logger.stopTimer("updateQT");
+    
+	logger.startTimer("transposeQT");
+	cusp::transpose(QT, Q);
+	logger.stopTimer("transposeQT");
+	
+	logger.startTimer("transposeE");
+	cusp::transpose(E, ET);
+	logger.stopTimer("transposeE");
 }
 
 template <>
 	real *QTVals = thrust::raw_pointer_cast(&(QT.values[0]));
 	
 	kernels::generateQT(QTRows, QTCols, QTVals, nx, ny);
-	
-	// Temparawari
-	int row = numP;
-	int num_elements = 4*numP-2*(nx+ny);
-	
-	/// Generate E
-	int  I, J, col;
-	real xB, yB, x, y, alpha, Dx;
-	int cur = 0;
-	for(int k=0; k<B.numBodies; k++)
-	{
-		Dx = domInfo->dx[B.I[B.offsets[k]]];
-		alpha = Dx*Dx;
-		for(int l=0; l<B.numPoints[k]; l++)
-		{	
-			xB = B.x[cur];
-			yB = B.y[cur];
-			I  = B.I[cur];
-			J  = B.J[cur];
-			for(int j=J-1; j<=J+1; j++)
-			{
-				for(int i=I-2; i<=I+1; i++)
-				{
-					col = j*(nx-1) + i;
-					x   = domInfo->x[i+1];
-					y   = 0.5*(domInfo->y[j] + domInfo->y[j+1]);
-					QT.row_indices[num_elements] = row;
-					QT.column_indices[num_elements] = col;
-					QT.values[num_elements] = alpha*delta(x-xB, y-yB, Dx)/Dx; // Dx = Dy near the body
-					num_elements++;
-				}
-			}
-			cur++;
-			row++;
-		}
-	}
-	cur=0;
-	for(int k=0; k<B.numBodies; k++)
-	{
-		Dx = domInfo->dx[B.I[B.offsets[k]]];
-		alpha = Dx*Dx;
-		for(int l=0; l<B.numPoints[k]; l++)
-		{
-			xB	= B.x[cur];
-			yB	= B.y[cur];
-			I	= B.I[cur];
-			J	= B.J[cur];
-			for(int j=J-2; j<=J+1; j++)
-			{
-				for(int i=I-1; i<=I+1; i++)
-				{
-					col	= j*nx + i + numU;
-					x	= 0.5*(domInfo->x[i] + domInfo->x[i+1]);
-					y	= domInfo->y[j+1];
-					QT.row_indices[num_elements] = row;
-					QT.column_indices[num_elements] = col;
-					QT.values[num_elements] = alpha*delta(x-xB, y-yB, Dx)/Dx;
-					num_elements++;
-				}
-			}
-			cur++;
-			row++;
-		}
-	}
-	
-	/// Generate the E part
-/*	for(int k=0; k<B.totalPoints; k++)
-	{
-		Dx = NavierStokesSolver<Matrix, Vector>::domInfo->dx[B.I[k]];
-		alpha = Dx*Dx;
-		xB = B.x[k];
-		yB = B.y[k];
-		I  = B.I[k];
-		J  = B.J[k];
-		for(int j=J-1; j<=J+1; j++)
-		{
-			for(i=I-2; i<=I+1; i++)
-			{
-				col	= j*(nx-1) + i;
-				x	= NavierStokesSolver<Matrix, Vector>::domInfo->x[i+1];
-				y	= 0.5*(NavierStokesSolver<Matrix, Vector>::domInfo->y[j] + NavierStokesSolver<Matrix, Vector>::domInfo->y[j+1]);
-				NavierStokesSolver<Matrix, Vector>::QT.row_indices[num_elements] = row;
-				NavierStokesSolver<Matrix, Vector>::QT.column_indices[num_elements] = col;
-				NavierStokesSolver<Matrix, Vector>::QT.values[num_elements] = alpha*delta(x-xB, y-yB, Dx)/Dx; // Dx = Dy near the body
-				num_elements++;
-			}
-		}
-		row++;
-	}
-	for(int k=0; k<B.totalPoints; k++)
-	{
-		Dx = NavierStokesSolver<Matrix, Vector>::domInfo->dx[B.I[k]];
-		alpha = Dx*Dx;
-		xB	= B.x[k];
-		yB	= B.y[k];
-		I	= B.I[k];
-		J	= B.J[k];
-		for(j=J-2; j<=J+1; j++)
-		{
-			for(i=I-1; i<=I+1; i++)
-			{
-				col	= j*nx + i + numU;
-				x	= 0.5*(NavierStokesSolver<Matrix, Vector>::domInfo->x[i] + NavierStokesSolver<Matrix, Vector>::domInfo->x[i+1]);
-				y	= NavierStokesSolver<Matrix, Vector>::domInfo->y[j+1];
-				NavierStokesSolver<Matrix, Vector>::QT.row_indices[num_elements] = row;
-				NavierStokesSolver<Matrix, Vector>::QT.column_indices[num_elements] = col;
-				NavierStokesSolver<Matrix, Vector>::QT.values[num_elements] = alpha*delta(x-xB, y-yB, Dx)/Dx;
-				num_elements++;
-			}
-		}
-		row++;
-	}
-*/
+
 	logger.stopTimer("generateQT");
 	
-	logger.startTimer("transposeQT");
-	cusp::transpose(QT, Q);
-	logger.stopTimer("transposeQT");
-	
-	
+	updateQT();	
 }
 
 template <>
 	
 	kernels::generateQT(QTRows, QTCols, QTVals, nx, ny);
 
+	QT = QTHost;
+	
+	logger.stopTimer("generateQT");
+	
+	updateQT();
+}
+
+#if 0
+template <>
+void TairaColoniusSolver<device_memory>::generateQT()
+{
+	logger.startTimer("generateQT");
+	
+	int  nx = domInfo->nx,
+	     ny = domInfo->ny;
+	
+	int  numU  = (nx-1)*ny;
+	int  numUV = numU + nx*(ny-1);
+	int  numP  = numU + ny;
+	
+	/// QT is an (np + 2*nb) x nuv matrix
+	cooH QTHost(numP + 2*B.totalPoints, numUV, 4*numP-2*(nx+ny) + 24*B.totalPoints);
+	
+	int  *QTRows = thrust::raw_pointer_cast(&(QTHost.row_indices[0])),
+	     *QTCols = thrust::raw_pointer_cast(&(QTHost.column_indices[0]));
+	real *QTVals = thrust::raw_pointer_cast(&(QTHost.values[0]));
+	
+	kernels::generateQT(QTRows, QTCols, QTVals, nx, ny);
+
 	// Temparawari
 	int row = numP;
 	int num_elements = 4*numP-2*(nx+ny);
 	
+/*	
 	/// Generate E
 	int  I, J, col;
 	real xB, yB, x, y, alpha, Dx;
 	int cur = 0;
 	for(int k=0; k<B.numBodies; k++)
 	{
-		Dx = domInfo->dx[B.I[B.offsets[k]]];
+		Dx = B.h[k];
 		alpha = Dx*Dx;
 		for(int l=0; l<B.numPoints[k]; l++)
 		{	
 	cur=0;
 	for(int k=0; k<B.numBodies; k++)
 	{
-		Dx = domInfo->dx[B.I[B.offsets[k]]];
+		Dx = B.h[k];
 		alpha = Dx*Dx;
 		for(int l=0; l<B.numPoints[k]; l++)
 		{
 			row++;
 		}
 	}
-	
+*/	
 	/// Generate the E part
 /*	for(int k=0; k<B.totalPoints; k++)
 	{
 	cusp::transpose(QT, Q);
 	logger.stopTimer("transposeQT");
 }
-
-template <typename memoryType>
-void TairaColoniusSolver<memoryType>::updateQT()
-{	
-}
-
-template <>
-void TairaColoniusSolver<host_memory>::generateE()
-{
-	logger.startTimer("generateE");
-	
-	int  nx = domInfo->nx,
-	     ny = domInfo->ny;
-	
-	int  numU  = (nx-1)*ny;
-	int  numUV = numU + nx*(ny-1);
-	
-	/// QT is an (np + 2*nb) x nuv matrix
-	E.resize(2*B.totalPoints, numUV, 24*B.totalPoints);
-	
-	int row = 0;
-	int num_elements = 0;
-
-	/// Generate E
-	int  I, J, col;
-	real xB, yB, x, y, alpha, Dx;
-	int cur = 0;
-	for(int k=0; k<B.numBodies; k++)
-	{
-		Dx = domInfo->dx[B.I[B.offsets[k]]];
-		alpha = Dx*Dx;
-		for(int l=0; l<B.numPoints[k]; l++)
-		{	
-			xB = B.x[cur];
-			yB = B.y[cur];
-			I  = B.I[cur];
-			J  = B.J[cur];
-			for(int j=J-1; j<=J+1; j++)
-			{
-				for(int i=I-2; i<=I+1; i++)
-				{
-					col = j*(nx-1) + i;
-					x   = domInfo->x[i+1];
-					y   = 0.5*(domInfo->y[j] + domInfo->y[j+1]);
-					E.row_indices[num_elements] = row;
-					E.column_indices[num_elements] = col;
-					E.values[num_elements] = alpha*delta(x-xB, y-yB, Dx)/Dx; // Dx = Dy near the body
-					num_elements++;
-				}
-			}
-			cur++;
-			row++;
-		}
-	}
-	cur=0;
-	for(int k=0; k<B.numBodies; k++)
-	{
-		Dx = domInfo->dx[B.I[B.offsets[k]]];
-		alpha = Dx*Dx;
-		for(int l=0; l<B.numPoints[k]; l++)
-		{
-			xB	= B.x[cur];
-			yB	= B.y[cur];
-			I	= B.I[cur];
-			J	= B.J[cur];
-			for(int j=J-2; j<=J+1; j++)
-			{
-				for(int i=I-1; i<=I+1; i++)
-				{
-					col	= j*nx + i + numU;
-					x	= 0.5*(domInfo->x[i] + domInfo->x[i+1]);
-					y	= domInfo->y[j+1];
-					E.row_indices[num_elements] = row;
-					E.column_indices[num_elements] = col;
-					E.values[num_elements] = alpha*delta(x-xB, y-yB, Dx)/Dx;
-					num_elements++;
-				}
-			}
-			cur++;
-			row++;
-		}
-	}
-	
-	logger.stopTimer("generateE");
-	
-	logger.startTimer("transposeE");
-	cusp::transpose(E, ET);
-	logger.stopTimer("transposeE");
-}
-
-template <>
-void TairaColoniusSolver<device_memory>::generateE()
-{
-	logger.startTimer("generateE");
-	
-	int  nx = domInfo->nx,
-	     ny = domInfo->ny;
-	
-	int  numU  = (nx-1)*ny;
-	int  numUV = numU + nx*(ny-1);
-	
-	/// QT is an (np + 2*nb) x nuv matrix
-	cooH EHost(2*B.totalPoints, numUV, 24*B.totalPoints);
-	
-	int row = 0;
-	int num_elements = 0;
-
-	/// Generate E
-	int  I, J, col;
-	real xB, yB, x, y, alpha, Dx;
-	int cur = 0;
-	for(int k=0; k<B.numBodies; k++)
-	{
-		Dx = domInfo->dx[B.I[B.offsets[k]]];
-		alpha = Dx*Dx;
-		for(int l=0; l<B.numPoints[k]; l++)
-		{	
-			xB = B.x[cur];
-			yB = B.y[cur];
-			I  = B.I[cur];
-			J  = B.J[cur];
-			for(int j=J-1; j<=J+1; j++)
-			{
-				for(int i=I-2; i<=I+1; i++)
-				{
-					col = j*(nx-1) + i;
-					x   = domInfo->x[i+1];
-					y   = 0.5*(domInfo->y[j] + domInfo->y[j+1]);
-					EHost.row_indices[num_elements] = row;
-					EHost.column_indices[num_elements] = col;
-					EHost.values[num_elements] = alpha*delta(x-xB, y-yB, Dx)/Dx; // Dx = Dy near the body
-					num_elements++;
-				}
-			}
-			cur++;
-			row++;
-		}
-	}
-	cur=0;
-	for(int k=0; k<B.numBodies; k++)
-	{
-		Dx = domInfo->dx[B.I[B.offsets[k]]];
-		alpha = Dx*Dx;
-		for(int l=0; l<B.numPoints[k]; l++)
-		{
-			xB	= B.x[cur];
-			yB	= B.y[cur];
-			I	= B.I[cur];
-			J	= B.J[cur];
-			for(int j=J-2; j<=J+1; j++)
-			{
-				for(int i=I-1; i<=I+1; i++)
-				{
-					col	= j*nx + i + numU;
-					x	= 0.5*(domInfo->x[i] + domInfo->x[i+1]);
-					y	= domInfo->y[j+1];
-					EHost.row_indices[num_elements] = row;
-					EHost.column_indices[num_elements] = col;
-					EHost.values[num_elements] = alpha*delta(x-xB, y-yB, Dx)/Dx;
-					num_elements++;
-				}
-			}
-			cur++;
-			row++;
-		}
-	}
-	E = EHost;
-	
-	logger.stopTimer("generateE");
-	
-	logger.startTimer("transposeE");
-	cusp::transpose(E, ET);
-	logger.stopTimer("transposeE");
-}
+#endif

src/solvers/NavierStokes/TairaColoniusSolver.cu

 	int numP  = nx*ny;
 	
 	NSWithBody<memoryType>::initialiseBodies();
+	int numB  = NSWithBody<memoryType>::B.totalPoints; 
+	
 	NavierStokesSolver<memoryType>::initialiseCommon();
-	NavierStokesSolver<memoryType>::initialiseArrays(numUV, numP+2*NSWithBody<memoryType>::B.totalPoints);
-	NavierStokesSolver<memoryType>::assembleMatrices();
-	if(NSWithBody<memoryType>::B.totalPoints > 0)
+	NavierStokesSolver<memoryType>::initialiseArrays(numUV, numP+2*numB);
+	if(numB > 0)
 	{
-		generateE();
+		E.resize(2*numB, numUV, 24*numB);
 		FxX.resize(NSWithBody<memoryType>::B.numCellsX[0]);
 		FxY.resize(NSWithBody<memoryType>::B.numCellsY[0]);
 		FxU.resize( (NSWithBody<memoryType>::B.numCellsX[0]+1)*NSWithBody<memoryType>::B.numCellsY[0] );
 	}
+	NavierStokesSolver<memoryType>::assembleMatrices();
 }
 
 template <typename memoryType>
 	{
 		
 		NSWithBody<memoryType>::updateBodies();
-		//updateQT();
-		generateQT();
-		generateE();
+		updateQT();
 		NavierStokesSolver<memoryType>::generateC();
 		
 		NavierStokesSolver<memoryType>::logger.startTimer("preconditioner2");

src/solvers/NavierStokes/kernels/Makefile

 all: objects force_look
 
 objects: $(OBJS)
-	$(AR) $(ARFLAGS) ruv ../../$(LIBNAME) $^
+	$(AR) $(ARFLAGS) ruv ../../$(LIBNAME) $?
 	$(RANLIB) ../../$(LIBNAME)
 
 force_look:

src/solvers/NavierStokes/kernels/generateQT.cu

 
 #include <solvers/NavierStokes/kernels/generateQT.h>
 
+__device__ \
+real dhRomaDevice(real x, real h)
+{
+	real r = fabs(x)/h;
+	
+	if(r>1.5)
+		return 0.0;
+	else if(r>0.5 && r<=1.5)
+		return 1.0/(6*h)*( 5.0 - 3.0*r - sqrt(-3.0*(1-r)*(1-r) + 1.0) );
+	else
+		return 1.0/(3*h)*( 1.0 + sqrt(-3.0*r*r + 1.0) );
+}
+
+__device__ \
+real deltaDevice(real x, real y, real h)
+{
+	return dhRomaDevice(x, h) * dhRomaDevice(y, h);
+}
+
 namespace kernels
 {
 
-__global__
+__global__ \
 void updateQ(int *QRows, int *QCols, real *QVals, int QSize, int *tags)
 {
 	int I = threadIdx.x + blockIdx.x*blockDim.x;
 	QVals[I] *= ( tags[QRows[I]] == -1 );
 }
 
-__global__
+__global__ \
 void updateQ(int *QRows, int *QCols, real *QVals, int QSize, int *tagsX, int *tagsY)
 {
 	int I = threadIdx.x + blockIdx.x*blockDim.x;
 	}
 }
 
+__global__ \
+void updateQT(int *QTRows, int *QTCols, real *QTVals,
+              int *ERows,  int *ECols,  real *EVals,
+              int nx, int ny, real *x, real *y, real *dx,
+              int totalPoints, real *xB, real *yB, int *I, int *J)
+{
+	int bodyIdx = threadIdx.x + blockIdx.x*blockDim.x;
+	
+	if(bodyIdx >= totalPoints)
+		return;
+	
+	int  Ib=I[bodyIdx],
+	     Jb=J[bodyIdx],
+	     QTIdx = 4*nx*ny - 2*(nx+ny) + bodyIdx*12,
+	     EIdx  = bodyIdx*12,
+	     i, j;
 
+	real Dx = dx[Ib];
+	
+	// populate x-components
+	for(j=Jb-1; j<=Jb+1; j++)
+	{
+		for(i=Ib-2; i<=Ib+1; i++)
+		{
+			QTRows[QTIdx] = bodyIdx + nx*ny;
+			ERows[EIdx] = bodyIdx;
+			
+			QTCols[QTIdx] = j*(nx-1) + i;
+			ECols[EIdx] = QTCols[QTIdx];
+			
+			QTVals[QTIdx] = Dx*deltaDevice(x[i+1]-xB[bodyIdx], 0.5*(y[j]+y[j+1])-yB[bodyIdx], Dx);
+			EVals[EIdx] = QTVals[QTIdx];
+			
+			QTIdx++;
+			EIdx++;
+		}
+	}
+
+	// populate y-components
+	for(j=Jb-2; j<=Jb+1; j++)
+	{
+		for(i=Ib-1; i<=Ib+1; i++)
+		{
+			QTRows[QTIdx+12*totalPoints-12] = bodyIdx + nx*ny + totalPoints;
+			ERows[EIdx+12*totalPoints-12] = bodyIdx + totalPoints;
+			
+			QTCols[QTIdx+12*totalPoints-12] = j*nx + i + (nx-1)*ny;
+			ECols[EIdx+12*totalPoints-12] = QTCols[QTIdx+12*totalPoints-12];
+			
+			QTVals[QTIdx+12*totalPoints-12] = Dx*deltaDevice(0.5*(x[i]+x[i+1])-xB[bodyIdx], y[j+1]-yB[bodyIdx], Dx);
+			EVals[EIdx+12*totalPoints-12] = QTVals[QTIdx+12*totalPoints-12];
+			
+			QTIdx++;
+			EIdx++;
+		}
+	}
+}
 
 } // end of namespace kernels
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.