Commits

Michael Lange committed eacc412 Draft

Code cleanup:
* MatThreadPartition() now prescribes boundaries for all threads, including the comm thread.
* This allows all MatMult()/MatMultAdd() to simply read the partition boundaries from the matrix object, regardless of execution mode.

Comments (0)

Files changed (3)

include/petsc-private/matimpl.h

 
   PetscFunctionBegin;
 #if defined(PETSC_HAVE_OPENMP)
-  nthread = PetscGetMaxThreads() - 1;
+  nthread = PetscGetMaxThreads();
   PetscMalloc(nthread * sizeof(PetscInt), &A->thread_start);
   PetscMalloc(nthread * sizeof(PetscInt), &A->thread_end);
 #else
-  nthread = 0;
+  nthread = 1;
 #endif
 
-  if (nthread > 0 && nz > 0) {
-    chunk = nz / nthread;
+  if (nthread > 1 ) {
+    // N-1 worker threads
+    chunk = nz / (nthread-1);
     A->thread_start[0] = 0;
-    t = 1;
+    A->thread_end[0] = 0;
+    A->thread_start[1] = 0;
+    t = 2;
     for (i=0; i<m+1; i++){
       if (ii[i] >= (t*chunk) && t < nthread) {
 	A->thread_start[t] = i;
     /* Now apply local diffusion */
     while(improved){
       improved = PETSC_FALSE;
-      for(i=0; i<nthread; i++){
+      for(i=1; i<nthread; i++){
 	if(i < nthread-1){
 	  // Look ahead
 	  my_nz = ii[ A->thread_end[i] ] - ii[ A->thread_start[i] ];
 	    improved = PETSC_TRUE;
 	  }
 	}
-	if(i > 0){
+	if(i > 1){
 	  // Look back
 	  my_nz = ii[ A->thread_end[i] ] - ii[ A->thread_start[i] ];
 	  new_nz = ii[ A->thread_end[i-1] + 1 ] - ii[ A->thread_start[i-1] ];
   } else {
     for (i=0; i<nthread; i++) {
       A->thread_start[i] = 0;
-      A->thread_end[i] = 0;
+      A->thread_end[i] = m;
     }
   }
   PetscFunctionReturn(0);

src/mat/impls/aij/seq/aij.c

   ii  = a->i;
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];
-  } else {
-    start = 0;
-    end = m;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];
 
   if (usecprow){ /* use compressed row format */
     m    = a->compressedrow.nrows;
   ii  = a->i;
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];
-  } else {
-    start = 0;
-    end = m;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];
 
   if (usecprow){ /* use compressed row format */
     if (zz != yy){

src/mat/impls/baij/seq/baij2.c

   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
   
   if(usecprow){
     mbs  = a->compressedrow.nrows;
   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     ii   = a->compressedrow.i;
   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){ /* use compressed row format */
     ii   = a->compressedrow.i;
   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     ii   = a->compressedrow.i;
   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     ii   = a->compressedrow.i;
   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     ii   = a->compressedrow.i;
   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     ii     = a->compressedrow.i;
   }
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     mbs  = a->compressedrow.nrows;
   }
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     mbs  = a->compressedrow.nrows;
   }
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     mbs  = a->compressedrow.nrows;
   }
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     mbs  = a->compressedrow.nrows;
   }
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     mbs  = a->compressedrow.nrows;
   }
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     mbs  = a->compressedrow.nrows;
   }
 
   thread = PetscGetThreadNum();
-  if (thread > 0){
-    thread -= 1;
-    start = A->thread_start[thread];
-    end = A->thread_end[thread];    
-  } else {
-    start = 0;
-    end = mbs;
-  }
+  start = A->thread_start[thread];
+  end = A->thread_end[thread];    
 
   if (usecprow){
     mbs  = a->compressedrow.nrows;