Commits

Michael Lange committed 4e5148c Draft

Threading VecMDot_Seq

This is essentially a backport from petsc-dev using the
same approach as the threadcomm implementation.

Comments (0)

Files changed (1)

src/vec/vec/impls/seq/dvec2.c

 #include <petsc-private/petscaxpy.h>
 #include <petscthreadcomm.h>
 
+#if defined (PETSC_HAVE_OPENMP)
+#undef __FUNCT__
+#define __FUNCT__ "VecMDot_Seq"
+PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
+{
+  PetscErrorCode           ierr;
+  PetscInt                 nreds,i,j,k;
+  PetscScalar    *xx,*yy;
+  PetscScalar    sum;
+  Vec *yvec;
+
+  PetscFunctionBegin;
+  for (i=0; i<nv; i+=nreds) {
+    nreds = PetscMin(nv-i,32);
+    ierr  = VecGetArray(xin,&xx);CHKERRQ(ierr);
+    yvec=(Vec*)yin+i;
+    for (k=0; k<nv; k++) {
+      sum  = 0.;
+      ierr = VecGetArray(yvec[k],&yy);CHKERRQ(ierr);
+
+      VecOMPParallelBegin(xin, shared(xx, yy) private(j) default(none) reduction(+:sum));
+      for (j=__start; j<__end; j++) sum += xx[j]*PetscConj(yy[j]);
+      VecOMPParallelEnd();
+      z[i+k] = sum;
+      ierr = VecRestoreArray(yvec[k],&yy);CHKERRQ(ierr);
+    }
+    ierr = VecRestoreArray(xin,&xx);CHKERRQ(ierr);
+  }
+  ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#else
 #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
 #include <../src/vec/vec/impls/seq/ftn-kernels/fmdot.h>
 #undef __FUNCT__  
   PetscFunctionReturn(0);
 }
 #endif
+#endif
 
 /* ----------------------------------------------------------------------------*/
 #undef __FUNCT__