Commits

Shrirang Abhyankar committed 7202cb7

[petsc-maint # 106790] Removed extra if test in the inner loop of MatMult_SeqSBAIJ_1

Hg-commit: 1d4f6480dece199f4cc7967098e7cbffa741a2b1

  • Participants
  • Parent commits 142808b

Comments (0)

Files changed (1)

src/mat/impls/sbaij/seq/relax.h

   const PetscInt       *ib=a->j;
   PetscInt             ibt;
 #endif
-  PetscInt             nonzerorow = 0;
+  PetscInt             nonzerorow = 0,jmin;
 
   PetscFunctionBegin;
   ierr = VecSet(zz,0.0);CHKERRQ(ierr);
     nonzerorow++;
     x1   = x[i];
     sum = 0;
-    for (j=0; j<nz; j++) {
+    jmin = 0;
+    if(ib[0] == i) { /* (diag of A)*x */
+      sum += v[0]*x1;
+      jmin++;
+    }
+    for (j=jmin; j<nz; j++) {
       ibt  = ib[j];
       vj   = v[j];
-      sum += vj * x[ibt];   /* (Upper triangular part of A)*x  */
-      if(ibt != i) z[ibt] += PetscConj(v[j]) * x1;    /* (strict lower triangular part of A)*x  */
+      sum += vj * x[ibt];   /* (strict upper triangular part of A)*x  */
+      z[ibt] += PetscConj(v[j]) * x1;    /* (strict lower triangular part of A)*x  */
     }
     z[i] += sum;
-    v    += nz;
+    v    +=    nz;
     ib   += nz;
   }
 
   const PetscInt       *ib=a->j;
   PetscInt             ibt;
 #endif
-  PetscInt             nonzerorow=0;
+  PetscInt             nonzerorow=0,jmin;
 
   PetscFunctionBegin;
   ierr = VecSet(zz,0.0);CHKERRQ(ierr);
     if (!nz) continue; /* Move to the next row if the current row is empty */
     nonzerorow++;
     x1   = x[i];
-    sum = 0;
+    sum  = 0;
+    jmin = 0;
+    if(ib[0] == i) { /* (diag of A)*x */
+      sum += v[0]*x1;
+      jmin++;
+    }
     PetscPrefetchBlock(ib+nz,nz,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
     PetscPrefetchBlock(v+nz,nz,0,PETSC_PREFETCH_HINT_NTA);  /* Entries for the next row */
-    for (j=0; j<nz; j++) {
-      ibt = ib[j];
-      vj  = v[j];
-      if(ibt != i) z[ibt] += vj * x1;       /* (strict lower triangular part of A)*x  */
-      sum    += vj * x[ibt]; /* (Upper triangular part of A)*x  */
+    for (j=jmin; j<nz; j++) {
+      ibt     = ib[j];
+      vj      = v[j];
+      z[ibt] += vj * x1;       /* (strict lower triangular part of A)*x  */
+      sum    += vj * x[ibt];   /* (strict upper triangular part of A)*x  */
     }
     z[i] += sum;
     v    += nz;