Commits

Karl Rupp committed e4a0ef1

ViennaCL: Renamed files to fulfill conventions (.cpp -> .cxx, all lowercase), removed restriction of C++ everywhere.

  • Participants
  • Parent commits d67ff14

Comments (0)

Files changed (11)

File src/mat/impls/aij/seq/seqviennacl/aijAssemble.cpp

-#include "petscconf.h"
-
-#include "../src/mat/impls/aij/seq/aij.h"          /*I "petscmat.h" I*/
-#include "petscbt.h"
-#include "../src/vec/vec/impls/dvecimpl.h"
-#include "petsc-private/vecimpl.h"
-
-
-#include "../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h"
-#include "viennacl/linalg/prod.hpp"
-
-
-
-// Ne: Number of elements
-// Nl: Number of dof per element
-#undef __FUNCT__
-#define __FUNCT__ "MatSetValuesBatch_SeqAIJViennaCL"
-PetscErrorCode MatSetValuesBatch_SeqAIJViennaCL(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats)
-{
-  //TODO
-  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Implementation of MatSetValuesBatch_SeqAIJViennaCL() missing.");
-}

File src/mat/impls/aij/seq/seqviennacl/aijassemble.cxx

+#include "petscconf.h"
+
+#include "../src/mat/impls/aij/seq/aij.h"          /*I "petscmat.h" I*/
+#include "petscbt.h"
+#include "../src/vec/vec/impls/dvecimpl.h"
+#include "petsc-private/vecimpl.h"
+
+
+#include "../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h"
+#include "viennacl/linalg/prod.hpp"
+
+
+
+// Ne: Number of elements
+// Nl: Number of dof per element
+#undef __FUNCT__
+#define __FUNCT__ "MatSetValuesBatch_SeqAIJViennaCL"
+PetscErrorCode MatSetValuesBatch_SeqAIJViennaCL(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats)
+{
+  //TODO
+  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Implementation of MatSetValuesBatch_SeqAIJViennaCL() missing.");
+}

File src/mat/impls/aij/seq/seqviennacl/aijviennacl.cpp

-
-
-/*
-    Defines the basic matrix operations for the AIJ (compressed row)
-  matrix storage format.
-*/
-
-#include "petscconf.h"
-#include "../src/mat/impls/aij/seq/aij.h"          /*I "petscmat.h" I*/
-#include "petscbt.h"
-#include "../src/vec/vec/impls/dvecimpl.h"
-#include "petsc-private/vecimpl.h"
-
-#include "../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h"
-
-
-#include <algorithm>
-#include <vector>
-#include <string>
-
-#include "viennacl/linalg/prod.hpp"
-
-#undef __FUNCT__
-#define __FUNCT__ "MatViennaCLCopyToGPU"
-PetscErrorCode MatViennaCLCopyToGPU(Mat A)
-{
-
-  Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
-  Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
-  //PetscInt           m               = A->rmap->n,*ii,*ridx;
-  PetscInt           *ii;
-  PetscErrorCode     ierr;
-
-
-  PetscFunctionBegin;
-  if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) {
-    ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
-
-    try {
-      //viennaclstruct->nonzerorow=0;
-      //for (PetscInt j = 0; j<m; j++) viennaclstruct->nonzerorow += (a->i[j+1] > a->i[j]);
-
-      viennaclstruct->mat = new ViennaCLAIJMatrix();
-      if (a->compressedrow.use) {
-        //m    = a->compressedrow.nrows;
-        ii   = a->compressedrow.i;
-        //ridx = a->compressedrow.rindex;
-        
-        viennaclstruct->mat->set(ii, a->j, a->a, A->rmap->n, A->cmap->n, a->nz);
-        
-        // TODO: Either convert to full CSR (inefficient), or hold row indices in temporary vector (requires additional bookkeeping for matrix-vector multiplications)
-        
-        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
-      } else {
-        
-        // Since PetscInt is in general different from cl_uint, we have to convert:
-        viennacl::backend::mem_handle dummy;
-        
-        viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
-        for (PetscInt i=0; i<=A->rmap->n; ++i)
-          row_buffer.set(i, (a->i)[i]);
-        
-        viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
-        for (PetscInt i=0; i<a->nz; ++i)
-          col_buffer.set(i, (a->j)[i]);
-        
-        viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
-      }
-    } catch(char *ex) {
-      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-    }
-
-    A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
-
-    ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
-  }
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "MatViennaCLCopyFromGPU"
-PetscErrorCode MatViennaCLCopyFromGPU(Mat A, ViennaCLAIJMatrix *Agpu)
-{
-  //Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
-  Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
-  PetscInt           m               = A->rmap->n;
-  PetscErrorCode     ierr;
-
-
-  PetscFunctionBegin;
-  if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) {
-    try {
-      if (a->compressedrow.use) {
-        SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
-      } else {
-
-        if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
-        a->nz           = Agpu->nnz();
-        a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
-        A->preallocated = PETSC_TRUE;
-        if (a->singlemalloc) {
-          if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
-        } else {
-          if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
-          if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
-          if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
-        }
-        ierr = PetscMalloc3(a->nz,PetscScalar,&a->a,a->nz,PetscInt,&a->j,m+1,PetscInt,&a->i);CHKERRQ(ierr);
-        ierr = PetscLogObjectMemory(A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
-
-        a->singlemalloc = PETSC_TRUE;
-
-        /* Setup row lengths */
-        if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
-        ierr = PetscMalloc2(m,PetscInt,&a->imax,m,PetscInt,&a->ilen);CHKERRQ(ierr);
-        ierr = PetscLogObjectMemory(A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
-
-        /* Copy data back from GPU */
-        viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
-        
-        // copy row array
-        viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
-        (a->i)[0] = row_buffer[0];
-        for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
-          (a->i)[i+1] = row_buffer[i+1];
-          a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
-        }
-        
-        // copy column indices
-        viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
-        viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
-        for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
-          (a->j)[i] = col_buffer[i];
-        
-        // copy nonzero entries directly to destination (no conversion required)
-        viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
-        
-        /* TODO: What about a->diag? */
-      }
-    } catch(char *ex) {
-      SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
-    }
-
-    /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */
-    ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-    ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
-
-    A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
-  } else {
-    SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
-  }
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "MatGetVecs_SeqAIJViennaCL"
-PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  if (right) {
-    ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr);
-    ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
-    ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
-    ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr);
-    ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
-  }
-  if (left) {
-    ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr);
-    ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
-    ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
-    ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr);
-    ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
-  }
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "MatMult_SeqAIJViennaCL"
-PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
-{
-  Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
-  PetscErrorCode     ierr;
-  Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
-  ViennaCLVector     *xgpu=NULL,*ygpu=NULL;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
-  ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
-  try {
-    *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
-  } catch (char *ex) {
-    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-  }
-  ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
-  ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
-  ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-
-
-#undef __FUNCT__
-#define __FUNCT__ "MatMultAdd_SeqAIJViennaCL"
-PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
-{
-  Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
-  PetscErrorCode     ierr;
-  Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
-  ViennaCLVector     *xgpu=NULL,*ygpu=NULL,*zgpu=NULL;
-
-  PetscFunctionBegin;
-  try {
-    ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
-    ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
-    ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
-
-    if (a->compressedrow.use) {
-        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
-    } else {
-      if (zz == xx || zz == yy) { //temporary required
-        ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
-        *zgpu = *ygpu + temp;
-      } else {
-        *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
-        *zgpu += *ygpu;
-      }
-    }
-
-    ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
-    ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
-    ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
-
-  } catch(char *ex) {
-    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-  }
-  ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL"
-PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
-  ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
-  if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
-  A->ops->mult    = MatMult_SeqAIJViennaCL;
-  A->ops->multadd = MatMultAdd_SeqAIJViennaCL;
-  PetscFunctionReturn(0);
-}
-
-/* --------------------------------------------------------------------------------*/
-#undef __FUNCT__
-#define __FUNCT__ "MatCreateSeqAIJViennaCL"
-/*@
-   MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
-   (the default parallel PETSc format).  This matrix will ultimately pushed down
-   to GPUs and use the ViennaCL library for calculations. For good matrix
-   assembly performance the user should preallocate the matrix storage by setting
-   the parameter nz (or the array nnz).  By setting these parameters accurately,
-   performance during matrix assembly can be increased substantially.
-
-
-   Collective on MPI_Comm
-
-   Input Parameters:
-+  comm - MPI communicator, set to PETSC_COMM_SELF
-.  m - number of rows
-.  n - number of columns
-.  nz - number of nonzeros per row (same for all rows)
--  nnz - array containing the number of nonzeros in the various rows
-         (possibly different for each row) or NULL
-
-   Output Parameter:
-.  A - the matrix
-
-   It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
-   MatXXXXSetPreallocation() paradigm instead of this routine directly.
-   [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
-
-   Notes:
-   If nnz is given then nz is ignored
-
-   The AIJ format (also called the Yale sparse matrix format or
-   compressed row storage), is fully compatible with standard Fortran 77
-   storage.  That is, the stored row and column indices can begin at
-   either one (as in Fortran) or zero.  See the users' manual for details.
-
-   Specify the preallocated storage with either nz or nnz (not both).
-   Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
-   allocation.  For large problems you MUST preallocate memory or you
-   will get TERRIBLE performance, see the users' manual chapter on matrices.
-
-   By default, this format uses inodes (identical nodes) when possible, to
-   improve numerical efficiency of matrix-vector products and solves. We
-   search for consecutive rows with the same nonzero structure, thereby
-   reusing matrix information to achieve increased efficiency.
-
-   Level: intermediate
-
-.seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
-
-@*/
-PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = MatCreate(comm,A);CHKERRQ(ierr);
-  ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
-  ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
-  ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-
-#undef __FUNCT__
-#define __FUNCT__ "MatDestroy_SeqAIJViennaCL"
-PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
-{
-  PetscErrorCode ierr;
-  Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
-
-  PetscFunctionBegin;
-  try {
-    if (A->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) delete (ViennaCLAIJMatrix*)(viennaclcontainer->mat);
-    delete viennaclcontainer;
-    A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
-  } catch(char *ex) {
-    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-  }
-  /*this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
-  A->spptr = 0;
-  ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-
-extern PetscErrorCode MatSetValuesBatch_SeqAIJViennaCL(Mat, PetscInt, PetscInt, PetscInt*,const PetscScalar*);
-
-
-#undef __FUNCT__
-#define __FUNCT__ "MatCreate_SeqAIJViennaCL"
-PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
-{
-  PetscErrorCode ierr;
-  Mat_SeqAIJ     *aij;
-
-  PetscFunctionBegin;
-  ierr            = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
-  aij             = (Mat_SeqAIJ*)B->data;
-  aij->inode.use  = PETSC_FALSE;
-  B->ops->mult    = MatMult_SeqAIJViennaCL;
-  B->ops->multadd = MatMultAdd_SeqAIJViennaCL;
-  B->spptr        = new Mat_SeqAIJViennaCL();
-
-  ((Mat_SeqAIJViennaCL*)B->spptr)->mat     = 0;
-
-  B->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
-  B->ops->destroy        = MatDestroy_SeqAIJViennaCL;
-  B->ops->getvecs        = MatGetVecs_SeqAIJViennaCL;
-  B->ops->setvaluesbatch = MatSetValuesBatch_SeqAIJViennaCL;
-
-  //ierr = PetscObjectComposeFunction((PetscObject)B,"MatViennaCLSetFormat_C", "MatViennaCLSetFormat_SeqAIJViennaCL", MatViennaCLSetFormat_SeqAIJViennaCL);CHKERRQ(ierr);
-  ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
-
-  B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
-  PetscFunctionReturn(0);
-}
-
-
-/*M
-   MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
-
-   A matrix type type whose data resides on GPUs. These matrices are in CSR format by
-   default. All matrix calculations are performed using the ViennaCL library.
-
-   Options Database Keys:
-+  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
-.  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
--  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
-
-  Level: beginner
-
-.seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
-M*/
-

File src/mat/impls/aij/seq/seqviennacl/aijviennacl.cxx

+
+
+/*
+    Defines the basic matrix operations for the AIJ (compressed row)
+  matrix storage format.
+*/
+
+#include "petscconf.h"
+#include "../src/mat/impls/aij/seq/aij.h"          /*I "petscmat.h" I*/
+#include "petscbt.h"
+#include "../src/vec/vec/impls/dvecimpl.h"
+#include "petsc-private/vecimpl.h"
+
+#include "../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h"
+
+
+#include <algorithm>
+#include <vector>
+#include <string>
+
+#include "viennacl/linalg/prod.hpp"
+
+#undef __FUNCT__
+#define __FUNCT__ "MatViennaCLCopyToGPU"
+PetscErrorCode MatViennaCLCopyToGPU(Mat A)
+{
+
+  Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
+  Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
+  //PetscInt           m               = A->rmap->n,*ii,*ridx;
+  PetscInt           *ii;
+  PetscErrorCode     ierr;
+
+
+  PetscFunctionBegin;
+  if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) {
+    ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
+
+    try {
+      //viennaclstruct->nonzerorow=0;
+      //for (PetscInt j = 0; j<m; j++) viennaclstruct->nonzerorow += (a->i[j+1] > a->i[j]);
+
+      viennaclstruct->mat = new ViennaCLAIJMatrix();
+      if (a->compressedrow.use) {
+        //m    = a->compressedrow.nrows;
+        ii   = a->compressedrow.i;
+        //ridx = a->compressedrow.rindex;
+        
+        viennaclstruct->mat->set(ii, a->j, a->a, A->rmap->n, A->cmap->n, a->nz);
+        
+        // TODO: Either convert to full CSR (inefficient), or hold row indices in temporary vector (requires additional bookkeeping for matrix-vector multiplications)
+        
+        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
+      } else {
+        
+        // Since PetscInt is in general different from cl_uint, we have to convert:
+        viennacl::backend::mem_handle dummy;
+        
+        viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1);
+        for (PetscInt i=0; i<=A->rmap->n; ++i)
+          row_buffer.set(i, (a->i)[i]);
+        
+        viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz);
+        for (PetscInt i=0; i<a->nz; ++i)
+          col_buffer.set(i, (a->j)[i]);
+        
+        viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz);
+      }
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+
+    A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
+
+    ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "MatViennaCLCopyFromGPU"
+PetscErrorCode MatViennaCLCopyFromGPU(Mat A, ViennaCLAIJMatrix *Agpu)
+{
+  //Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
+  Mat_SeqAIJ         *a              = (Mat_SeqAIJ*)A->data;
+  PetscInt           m               = A->rmap->n;
+  PetscErrorCode     ierr;
+
+
+  PetscFunctionBegin;
+  if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) {
+    try {
+      if (a->compressedrow.use) {
+        SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices");
+      } else {
+
+        if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m);
+        a->nz           = Agpu->nnz();
+        a->maxnz        = a->nz; /* Since we allocate exactly the right amount */
+        A->preallocated = PETSC_TRUE;
+        if (a->singlemalloc) {
+          if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);}
+        } else {
+          if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);}
+          if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);}
+          if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);}
+        }
+        ierr = PetscMalloc3(a->nz,PetscScalar,&a->a,a->nz,PetscInt,&a->j,m+1,PetscInt,&a->i);CHKERRQ(ierr);
+        ierr = PetscLogObjectMemory(A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
+
+        a->singlemalloc = PETSC_TRUE;
+
+        /* Setup row lengths */
+        if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
+        ierr = PetscMalloc2(m,PetscInt,&a->imax,m,PetscInt,&a->ilen);CHKERRQ(ierr);
+        ierr = PetscLogObjectMemory(A, 2*m*sizeof(PetscInt));CHKERRQ(ierr);
+
+        /* Copy data back from GPU */
+        viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1);
+        
+        // copy row array
+        viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get());
+        (a->i)[0] = row_buffer[0];
+        for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) {
+          (a->i)[i+1] = row_buffer[i+1];
+          a->imax[i]  = a->ilen[i] = a->i[i+1] - a->i[i];  //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse
+        }
+        
+        // copy column indices
+        viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz());
+        viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get());
+        for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i)
+          (a->j)[i] = col_buffer[i];
+        
+        // copy nonzero entries directly to destination (no conversion required)
+        viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a);
+        
+        /* TODO: What about a->diag? */
+      }
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
+    }
+
+    /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */
+    ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+    ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+
+    A->valid_GPU_matrix = PETSC_VIENNACL_BOTH;
+  } else {
+    SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices");
+  }
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "MatGetVecs_SeqAIJViennaCL"
+PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  if (right) {
+    ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr);
+    ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
+    ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
+    ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr);
+    ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
+  }
+  if (left) {
+    ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr);
+    ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
+    ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
+    ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr);
+    ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "MatMult_SeqAIJViennaCL"
+PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy)
+{
+  Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
+  PetscErrorCode     ierr;
+  Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
+  ViennaCLVector     *xgpu=NULL,*ygpu=NULL;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
+  ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr);
+  try {
+    *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu);
+  } catch (char *ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
+  ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr);
+  ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+
+#undef __FUNCT__
+#define __FUNCT__ "MatMultAdd_SeqAIJViennaCL"
+PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz)
+{
+  Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
+  PetscErrorCode     ierr;
+  Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr;
+  ViennaCLVector     *xgpu=NULL,*ygpu=NULL,*zgpu=NULL;
+
+  PetscFunctionBegin;
+  try {
+    ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr);
+    ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr);
+
+    if (a->compressedrow.use) {
+        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported");
+    } else {
+      if (zz == xx || zz == yy) { //temporary required
+        ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
+        *zgpu = *ygpu + temp;
+      } else {
+        *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu);
+        *zgpu += *ygpu;
+      }
+    }
+
+    ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr);
+
+  } catch(char *ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL"
+PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr);
+  ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr);
+  if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
+  A->ops->mult    = MatMult_SeqAIJViennaCL;
+  A->ops->multadd = MatMultAdd_SeqAIJViennaCL;
+  PetscFunctionReturn(0);
+}
+
+/* --------------------------------------------------------------------------------*/
+#undef __FUNCT__
+#define __FUNCT__ "MatCreateSeqAIJViennaCL"
+/*@
+   MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
+   (the default parallel PETSc format).  This matrix will ultimately pushed down
+   to GPUs and use the ViennaCL library for calculations. For good matrix
+   assembly performance the user should preallocate the matrix storage by setting
+   the parameter nz (or the array nnz).  By setting these parameters accurately,
+   performance during matrix assembly can be increased substantially.
+
+
+   Collective on MPI_Comm
+
+   Input Parameters:
++  comm - MPI communicator, set to PETSC_COMM_SELF
+.  m - number of rows
+.  n - number of columns
+.  nz - number of nonzeros per row (same for all rows)
+-  nnz - array containing the number of nonzeros in the various rows
+         (possibly different for each row) or NULL
+
+   Output Parameter:
+.  A - the matrix
+
+   It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
+   MatXXXXSetPreallocation() paradigm instead of this routine directly.
+   [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
+
+   Notes:
+   If nnz is given then nz is ignored
+
+   The AIJ format (also called the Yale sparse matrix format or
+   compressed row storage), is fully compatible with standard Fortran 77
+   storage.  That is, the stored row and column indices can begin at
+   either one (as in Fortran) or zero.  See the users' manual for details.
+
+   Specify the preallocated storage with either nz or nnz (not both).
+   Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
+   allocation.  For large problems you MUST preallocate memory or you
+   will get TERRIBLE performance, see the users' manual chapter on matrices.
+
+   By default, this format uses inodes (identical nodes) when possible, to
+   improve numerical efficiency of matrix-vector products and solves. We
+   search for consecutive rows with the same nonzero structure, thereby
+   reusing matrix information to achieve increased efficiency.
+
+   Level: intermediate
+
+.seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ()
+
+@*/
+PetscErrorCode  MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = MatCreate(comm,A);CHKERRQ(ierr);
+  ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
+  ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
+  ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "MatDestroy_SeqAIJViennaCL"
+PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A)
+{
+  PetscErrorCode ierr;
+  Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr;
+
+  PetscFunctionBegin;
+  try {
+    if (A->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) delete (ViennaCLAIJMatrix*)(viennaclcontainer->mat);
+    delete viennaclcontainer;
+    A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
+  } catch(char *ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  /*this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */
+  A->spptr = 0;
+  ierr     = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+extern PetscErrorCode MatSetValuesBatch_SeqAIJViennaCL(Mat, PetscInt, PetscInt, PetscInt*,const PetscScalar*);
+
+
+#undef __FUNCT__
+#define __FUNCT__ "MatCreate_SeqAIJViennaCL"
+PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B)
+{
+  PetscErrorCode ierr;
+  Mat_SeqAIJ     *aij;
+
+  PetscFunctionBegin;
+  ierr            = MatCreate_SeqAIJ(B);CHKERRQ(ierr);
+  aij             = (Mat_SeqAIJ*)B->data;
+  aij->inode.use  = PETSC_FALSE;
+  B->ops->mult    = MatMult_SeqAIJViennaCL;
+  B->ops->multadd = MatMultAdd_SeqAIJViennaCL;
+  B->spptr        = new Mat_SeqAIJViennaCL();
+
+  ((Mat_SeqAIJViennaCL*)B->spptr)->mat     = 0;
+
+  B->ops->assemblyend    = MatAssemblyEnd_SeqAIJViennaCL;
+  B->ops->destroy        = MatDestroy_SeqAIJViennaCL;
+  B->ops->getvecs        = MatGetVecs_SeqAIJViennaCL;
+  B->ops->setvaluesbatch = MatSetValuesBatch_SeqAIJViennaCL;
+
+  //ierr = PetscObjectComposeFunction((PetscObject)B,"MatViennaCLSetFormat_C", "MatViennaCLSetFormat_SeqAIJViennaCL", MatViennaCLSetFormat_SeqAIJViennaCL);CHKERRQ(ierr);
+  ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
+
+  B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED;
+  PetscFunctionReturn(0);
+}
+
+
+/*M
+   MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices.
+
+   A matrix type type whose data resides on GPUs. These matrices are in CSR format by
+   default. All matrix calculations are performed using the ViennaCL library.
+
+   Options Database Keys:
++  -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions()
+.  -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
+-  -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions().
+
+  Level: beginner
+
+.seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL()
+M*/
+

File src/mat/impls/aij/seq/seqviennacl/makefile

 #requirespackage 'PETSC_HAVE_VIENNACL'
-#requireslanguage CXXONLY
 
 ALL: lib
 
 CFLAGS    =
 FFLAGS    =
-SOURCEC   = aijviennacl.cpp aijAssemble.cpp
+SOURCEC   = aijviennacl.cxx aijassemble.cxx
 SOURCEF   =
 SOURCEH   = viennaclmatimpl.h
 LIBBASE   = libpetscmat

File src/vec/vec/impls/mpi/mpiviennacl/makefile

 #requirespackage 'PETSC_HAVE_VIENNACL'
-#requireslanguage CXXONLY
+
 ALL: lib
 
 CFLAGS   = ${PNETCDF_INCLUDE}
 FFLAGS   =
-SOURCEC  = mpiviennacl.cpp
+SOURCEC  = mpiviennacl.cxx
 SOURCEF  =
 SOURCEH  =
 LIBBASE  = libpetscvec

File src/vec/vec/impls/mpi/mpiviennacl/mpiviennacl.cpp

-
-/*
-   This file contains routines for Parallel vector operations.
- */
-#include <petscconf.h>
-#include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
-#include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
-
-#undef __FUNCT__
-#define __FUNCT__ "VecDestroy_MPIViennaCL"
-PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  try {
-    if (v->spptr) {
-      delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
-      delete (Vec_ViennaCL*) v->spptr;
-    }
-  } catch(char * ex) {
-    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-  }
-  ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecNorm_MPIViennaCL"
-PetscErrorCode VecNorm_MPIViennaCL(Vec xin,NormType type,PetscReal *z)
-{
-  PetscReal      sum,work = 0.0;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  if (type == NORM_2 || type == NORM_FROBENIUS) {
-    ierr  = VecNorm_SeqViennaCL(xin,NORM_2,&work);
-    work *= work;
-    ierr  = MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
-    *z    = PetscSqrtReal(sum);
-  } else if (type == NORM_1) {
-    /* Find the local part */
-    ierr = VecNorm_SeqViennaCL(xin,NORM_1,&work);CHKERRQ(ierr);
-    /* Find the global max */
-    ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
-  } else if (type == NORM_INFINITY) {
-    /* Find the local max */
-    ierr = VecNorm_SeqViennaCL(xin,NORM_INFINITY,&work);CHKERRQ(ierr);
-    /* Find the global max */
-    ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
-  } else if (type == NORM_1_AND_2) {
-    PetscReal temp[2];
-    ierr = VecNorm_SeqViennaCL(xin,NORM_1,temp);CHKERRQ(ierr);
-    ierr = VecNorm_SeqViennaCL(xin,NORM_2,temp+1);CHKERRQ(ierr);
-    temp[1] = temp[1]*temp[1];
-    ierr = MPI_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
-    z[1] = PetscSqrtReal(z[1]);
-  }
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecDot_MPIViennaCL"
-PetscErrorCode VecDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
-{
-  PetscScalar    sum,work;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecDot_SeqViennaCL(xin,yin,&work);CHKERRQ(ierr);
-  ierr = MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
-  *z   = sum;
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecTDot_MPIViennaCL"
-PetscErrorCode VecTDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
-{
-  PetscScalar    sum,work;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecTDot_SeqViennaCL(xin,yin,&work);CHKERRQ(ierr);
-  ierr = MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
-  *z   = sum;
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecMDot_MPIViennaCL"
-PetscErrorCode VecMDot_MPIViennaCL(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
-{
-  PetscScalar    awork[128],*work = awork;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  if (nv > 128) {
-    ierr = PetscMalloc(nv*sizeof(PetscScalar),&work);CHKERRQ(ierr);
-  }
-  ierr = VecMDot_SeqViennaCL(xin,nv,y,work);CHKERRQ(ierr);
-  ierr = MPI_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
-  if (nv > 128) {
-    ierr = PetscFree(work);CHKERRQ(ierr);
-  }
-  PetscFunctionReturn(0);
-}
-
-/*MC
-   VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL
-
-   Options Database Keys:
-. -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()
-
-  Level: beginner
-
-.seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMpiWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateMpi()
-M*/
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecDuplicate_MPIViennaCL"
-PetscErrorCode VecDuplicate_MPIViennaCL(Vec win,Vec *v)
-{
-  PetscErrorCode ierr;
-  Vec_MPI        *vw,*w = (Vec_MPI*)win->data;
-  PetscScalar    *array;
-
-  PetscFunctionBegin;
-  ierr = VecCreate(PetscObjectComm((PetscObject)win),v);CHKERRQ(ierr);
-  ierr = PetscLayoutReference(win->map,&(*v)->map);CHKERRQ(ierr);
-
-  ierr = VecCreate_MPI_Private(*v,PETSC_FALSE,w->nghost,0);CHKERRQ(ierr);
-  vw   = (Vec_MPI*)(*v)->data;
-  ierr = PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));CHKERRQ(ierr);
-
-  /* save local representation of the parallel vector (and scatter) if it exists */
-  if (w->localrep) {
-    ierr = VecGetArray(*v,&array);CHKERRQ(ierr);
-    ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);CHKERRQ(ierr);
-    ierr = PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));CHKERRQ(ierr);
-    ierr = VecRestoreArray(*v,&array);CHKERRQ(ierr);
-    ierr = PetscLogObjectParent(*v,vw->localrep);CHKERRQ(ierr);
-    vw->localupdate = w->localupdate;
-    if (vw->localupdate) {
-      ierr = PetscObjectReference((PetscObject)vw->localupdate);CHKERRQ(ierr);
-    }
-  }
-
-  /* New vector should inherit stashing property of parent */
-  (*v)->stash.donotstash   = win->stash.donotstash;
-  (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
-
-  /* change type_name appropriately */
-  ierr = PetscObjectChangeTypeName((PetscObject)(*v),VECMPIVIENNACL);CHKERRQ(ierr);
-
-  ierr = PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);CHKERRQ(ierr);
-  ierr = PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);CHKERRQ(ierr);
-  (*v)->map->bs   = win->map->bs;
-  (*v)->bstash.bs = win->bstash.bs;
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecDotNorm2_MPIViennaCL"
-PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
-{
-  PetscErrorCode ierr;
-  PetscScalar    work[2],sum[2];
-
-  PetscFunctionBegin;
-  ierr = VecDotNorm2_SeqViennaCL(s,t,work,work+1);CHKERRQ(ierr);
-  ierr = MPI_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));CHKERRQ(ierr);
-  *dp  = sum[0];
-  *nm  = sum[1];
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecCreate_MPIViennaCL"
-PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);CHKERRQ(ierr);
-  ierr = PetscObjectChangeTypeName((PetscObject)vv,VECMPIVIENNACL);CHKERRQ(ierr);
-
-  vv->ops->dotnorm2        = VecDotNorm2_MPIViennaCL;
-  vv->ops->waxpy           = VecWAXPY_SeqViennaCL;
-  vv->ops->duplicate       = VecDuplicate_MPIViennaCL;
-  vv->ops->dot             = VecDot_MPIViennaCL;
-  vv->ops->mdot            = VecMDot_MPIViennaCL;
-  vv->ops->tdot            = VecTDot_MPIViennaCL;
-  vv->ops->norm            = VecNorm_MPIViennaCL;
-  vv->ops->scale           = VecScale_SeqViennaCL;
-  vv->ops->copy            = VecCopy_SeqViennaCL;
-  vv->ops->set             = VecSet_SeqViennaCL;
-  vv->ops->swap            = VecSwap_SeqViennaCL;
-  vv->ops->axpy            = VecAXPY_SeqViennaCL;
-  vv->ops->axpby           = VecAXPBY_SeqViennaCL;
-  vv->ops->maxpy           = VecMAXPY_SeqViennaCL;
-  vv->ops->aypx            = VecAYPX_SeqViennaCL;
-  vv->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
-  vv->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
-  vv->ops->setrandom       = VecSetRandom_SeqViennaCL;
-  vv->ops->replacearray    = VecReplaceArray_SeqViennaCL;
-  vv->ops->dot_local       = VecDot_SeqViennaCL;
-  vv->ops->tdot_local      = VecTDot_SeqViennaCL;
-  vv->ops->norm_local      = VecNorm_SeqViennaCL;
-  vv->ops->mdot_local      = VecMDot_SeqViennaCL;
-  vv->ops->destroy         = VecDestroy_MPIViennaCL;
-  vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
-  /* place array?
-     reset array?
-     get values?
-  */
-  ierr = VecViennaCLAllocateCheck(vv);CHKERRQ(ierr);
-  vv->valid_GPU_array      = PETSC_VIENNACL_GPU;
-  ierr = VecSet(vv,0.0);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecCreate_ViennaCL"
-PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
-{
-  PetscErrorCode ierr;
-  PetscMPIInt    size;
-
-  PetscFunctionBegin;
-  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);CHKERRQ(ierr);
-  if (size == 1) {
-    ierr = VecSetType(v,VECSEQVIENNACL);CHKERRQ(ierr);
-  } else {
-    ierr = VecSetType(v,VECMPIVIENNACL);CHKERRQ(ierr);
-  }
-  PetscFunctionReturn(0);
-}
-

File src/vec/vec/impls/mpi/mpiviennacl/mpiviennacl.cxx

+
+/*
+   This file contains routines for Parallel vector operations.
+ */
+#include <petscconf.h>
+#include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
+#include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
+
+#undef __FUNCT__
+#define __FUNCT__ "VecDestroy_MPIViennaCL"
+PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  try {
+    if (v->spptr) {
+      delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
+      delete (Vec_ViennaCL*) v->spptr;
+    }
+  } catch(char * ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecNorm_MPIViennaCL"
+PetscErrorCode VecNorm_MPIViennaCL(Vec xin,NormType type,PetscReal *z)
+{
+  PetscReal      sum,work = 0.0;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  if (type == NORM_2 || type == NORM_FROBENIUS) {
+    ierr  = VecNorm_SeqViennaCL(xin,NORM_2,&work);
+    work *= work;
+    ierr  = MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
+    *z    = PetscSqrtReal(sum);
+  } else if (type == NORM_1) {
+    /* Find the local part */
+    ierr = VecNorm_SeqViennaCL(xin,NORM_1,&work);CHKERRQ(ierr);
+    /* Find the global max */
+    ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
+  } else if (type == NORM_INFINITY) {
+    /* Find the local max */
+    ierr = VecNorm_SeqViennaCL(xin,NORM_INFINITY,&work);CHKERRQ(ierr);
+    /* Find the global max */
+    ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
+  } else if (type == NORM_1_AND_2) {
+    PetscReal temp[2];
+    ierr = VecNorm_SeqViennaCL(xin,NORM_1,temp);CHKERRQ(ierr);
+    ierr = VecNorm_SeqViennaCL(xin,NORM_2,temp+1);CHKERRQ(ierr);
+    temp[1] = temp[1]*temp[1];
+    ierr = MPI_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
+    z[1] = PetscSqrtReal(z[1]);
+  }
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecDot_MPIViennaCL"
+PetscErrorCode VecDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
+{
+  PetscScalar    sum,work;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecDot_SeqViennaCL(xin,yin,&work);CHKERRQ(ierr);
+  ierr = MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
+  *z   = sum;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecTDot_MPIViennaCL"
+PetscErrorCode VecTDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar *z)
+{
+  PetscScalar    sum,work;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecTDot_SeqViennaCL(xin,yin,&work);CHKERRQ(ierr);
+  ierr = MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
+  *z   = sum;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecMDot_MPIViennaCL"
+PetscErrorCode VecMDot_MPIViennaCL(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
+{
+  PetscScalar    awork[128],*work = awork;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  if (nv > 128) {
+    ierr = PetscMalloc(nv*sizeof(PetscScalar),&work);CHKERRQ(ierr);
+  }
+  ierr = VecMDot_SeqViennaCL(xin,nv,y,work);CHKERRQ(ierr);
+  ierr = MPI_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));CHKERRQ(ierr);
+  if (nv > 128) {
+    ierr = PetscFree(work);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+/*MC
+   VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL
+
+   Options Database Keys:
+. -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()
+
+  Level: beginner
+
+.seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMpiWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateMpi()
+M*/
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecDuplicate_MPIViennaCL"
+PetscErrorCode VecDuplicate_MPIViennaCL(Vec win,Vec *v)
+{
+  PetscErrorCode ierr;
+  Vec_MPI        *vw,*w = (Vec_MPI*)win->data;
+  PetscScalar    *array;
+
+  PetscFunctionBegin;
+  ierr = VecCreate(PetscObjectComm((PetscObject)win),v);CHKERRQ(ierr);
+  ierr = PetscLayoutReference(win->map,&(*v)->map);CHKERRQ(ierr);
+
+  ierr = VecCreate_MPI_Private(*v,PETSC_FALSE,w->nghost,0);CHKERRQ(ierr);
+  vw   = (Vec_MPI*)(*v)->data;
+  ierr = PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));CHKERRQ(ierr);
+
+  /* save local representation of the parallel vector (and scatter) if it exists */
+  if (w->localrep) {
+    ierr = VecGetArray(*v,&array);CHKERRQ(ierr);
+    ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);CHKERRQ(ierr);
+    ierr = PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));CHKERRQ(ierr);
+    ierr = VecRestoreArray(*v,&array);CHKERRQ(ierr);
+    ierr = PetscLogObjectParent(*v,vw->localrep);CHKERRQ(ierr);
+    vw->localupdate = w->localupdate;
+    if (vw->localupdate) {
+      ierr = PetscObjectReference((PetscObject)vw->localupdate);CHKERRQ(ierr);
+    }
+  }
+
+  /* New vector should inherit stashing property of parent */
+  (*v)->stash.donotstash   = win->stash.donotstash;
+  (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
+
+  /* change type_name appropriately */
+  ierr = PetscObjectChangeTypeName((PetscObject)(*v),VECMPIVIENNACL);CHKERRQ(ierr);
+
+  ierr = PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);CHKERRQ(ierr);
+  ierr = PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);CHKERRQ(ierr);
+  (*v)->map->bs   = win->map->bs;
+  (*v)->bstash.bs = win->bstash.bs;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecDotNorm2_MPIViennaCL"
+PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
+{
+  PetscErrorCode ierr;
+  PetscScalar    work[2],sum[2];
+
+  PetscFunctionBegin;
+  ierr = VecDotNorm2_SeqViennaCL(s,t,work,work+1);CHKERRQ(ierr);
+  ierr = MPI_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));CHKERRQ(ierr);
+  *dp  = sum[0];
+  *nm  = sum[1];
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecCreate_MPIViennaCL"
+PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);CHKERRQ(ierr);
+  ierr = PetscObjectChangeTypeName((PetscObject)vv,VECMPIVIENNACL);CHKERRQ(ierr);
+
+  vv->ops->dotnorm2        = VecDotNorm2_MPIViennaCL;
+  vv->ops->waxpy           = VecWAXPY_SeqViennaCL;
+  vv->ops->duplicate       = VecDuplicate_MPIViennaCL;
+  vv->ops->dot             = VecDot_MPIViennaCL;
+  vv->ops->mdot            = VecMDot_MPIViennaCL;
+  vv->ops->tdot            = VecTDot_MPIViennaCL;
+  vv->ops->norm            = VecNorm_MPIViennaCL;
+  vv->ops->scale           = VecScale_SeqViennaCL;
+  vv->ops->copy            = VecCopy_SeqViennaCL;
+  vv->ops->set             = VecSet_SeqViennaCL;
+  vv->ops->swap            = VecSwap_SeqViennaCL;
+  vv->ops->axpy            = VecAXPY_SeqViennaCL;
+  vv->ops->axpby           = VecAXPBY_SeqViennaCL;
+  vv->ops->maxpy           = VecMAXPY_SeqViennaCL;
+  vv->ops->aypx            = VecAYPX_SeqViennaCL;
+  vv->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
+  vv->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
+  vv->ops->setrandom       = VecSetRandom_SeqViennaCL;
+  vv->ops->replacearray    = VecReplaceArray_SeqViennaCL;
+  vv->ops->dot_local       = VecDot_SeqViennaCL;
+  vv->ops->tdot_local      = VecTDot_SeqViennaCL;
+  vv->ops->norm_local      = VecNorm_SeqViennaCL;
+  vv->ops->mdot_local      = VecMDot_SeqViennaCL;
+  vv->ops->destroy         = VecDestroy_MPIViennaCL;
+  vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
+  /* place array?
+     reset array?
+     get values?
+  */
+  ierr = VecViennaCLAllocateCheck(vv);CHKERRQ(ierr);
+  vv->valid_GPU_array      = PETSC_VIENNACL_GPU;
+  ierr = VecSet(vv,0.0);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecCreate_ViennaCL"
+PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
+{
+  PetscErrorCode ierr;
+  PetscMPIInt    size;
+
+  PetscFunctionBegin;
+  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);CHKERRQ(ierr);
+  if (size == 1) {
+    ierr = VecSetType(v,VECSEQVIENNACL);CHKERRQ(ierr);
+  } else {
+    ierr = VecSetType(v,VECMPIVIENNACL);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+

File src/vec/vec/impls/seq/seqviennacl/makefile

 #requirespackage 'PETSC_HAVE_VIENNACL'
-#requireslanguage CXXONLY
+
 ALL: lib
 
 CFLAGS   =
 FFLAGS   =
-SOURCEC  = vecviennacl.cpp
+SOURCEC  = vecviennacl.cxx
 SOURCEF  =
 SOURCEH  = viennaclvecimpl.h
 LIBBASE  = libpetscvec

File src/vec/vec/impls/seq/seqviennacl/vecviennacl.cpp

-/*
-   Implements the sequential ViennaCL vectors.
-*/
-
-#include <petscconf.h>
-#include <petsc-private/vecimpl.h>          /*I "petscvec.h" I*/
-#include <../src/vec/vec/impls/dvecimpl.h>
-#include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
-
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/norm_1.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/linalg/norm_inf.hpp"
-
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLAllocateCheckHost"
-/*
-    Allocates space for the vector array on the Host if it does not exist.
-    Does NOT change the PetscViennaCLFlag for the vector
-    Does NOT zero the ViennaCL array
- */
-PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
-{
-  PetscErrorCode ierr;
-  PetscScalar    *array;
-  Vec_Seq        *s;
-  PetscInt       n = v->map->n;
-
-  PetscFunctionBegin;
-  s    = (Vec_Seq*)v->data;
-  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
-  if (s->array == 0) {
-    ierr               = PetscMalloc(n*sizeof(PetscScalar),&array);CHKERRQ(ierr);
-    ierr               = PetscLogObjectMemory(v,n*sizeof(PetscScalar));CHKERRQ(ierr);
-    s->array           = array;
-    s->array_allocated = array;
-  }
-  PetscFunctionReturn(0);
-}
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLAllocateCheck"
-/*
-    Allocates space for the vector array on the GPU if it does not exist.
-    Does NOT change the PetscViennaCLFlag for the vector
-    Does NOT zero the ViennaCL array
-
- */
-PetscErrorCode VecViennaCLAllocateCheck(Vec v)
-{
-  PetscErrorCode ierr;
-  int            rank;
-
-  PetscFunctionBegin;
-  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
-  // First allocate memory on the GPU if needed
-  if (!v->spptr) {
-    try {
-      v->spptr                            = new Vec_ViennaCL;
-      ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);
-
-    } catch(char *ex) {
-      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-    }
-  }
-  PetscFunctionReturn(0);
-}
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLCopyToGPU"
-/* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
-PetscErrorCode VecViennaCLCopyToGPU(Vec v)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
-  if (v->valid_GPU_array == PETSC_VIENNACL_CPU) {
-    ierr = PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);CHKERRQ(ierr);
-    try {
-      ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
-      viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
-      //ierr = WaitForGPU();CHKERRViennaCL(ierr);  //copy does not return before data is safe. No need to wait.
-    } catch(char *ex) {
-      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-    }
-    ierr = PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);CHKERRQ(ierr);
-    v->valid_GPU_array = PETSC_VIENNACL_BOTH;
-  }
-  PetscFunctionReturn(0);
-}
-
-
-/* TODO
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLCopyToGPUSome"
-static PetscErrorCode VecViennaCLCopyToGPUSome(Vec v, PetscViennaCLIndices ci)
-{
-  PetscErrorCode ierr;
-  ViennaCLARRAY      *varray;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
-  if (v->valid_GPU_array == PETSC_ViennaCL_CPU) {
-    ierr = PetscLogEventBegin(VEC_ViennaCLCopyToGPUSome,v,0,0,0);CHKERRQ(ierr);
-    
-    ViennaCLVector & vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
-    Vec_Seq *s           = (Vec_Seq*)v->data;
-
-    ViennaCLINTARRAYCPU *indicesCPU=&ci->recvIndicesCPU;
-    ViennaCLINTARRAYGPU *indicesGPU=&ci->recvIndicesGPU;
-
-    viennacl::fast_copy(
-    thrust::copy(thrust::make_permutation_iterator(s->array,indicesCPU->begin()),
-                 thrust::make_permutation_iterator(s->array,indicesCPU->end()),
-                 thrust::make_permutation_iterator(varray->begin(),indicesGPU->begin()));
-
-    // Set the buffer states
-    v->valid_GPU_array = PETSC_VIENNACL_BOTH;
-    ierr = PetscLogEventEnd(VEC_ViennaCLCopyToGPUSome,v,0,0,0);CHKERRQ(ierr);
-  }
-  PetscFunctionReturn(0);
-}
-*/
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLCopyFromGPU"
-/*
-     VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
-*/
-PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLAllocateCheckHost(v);CHKERRQ(ierr);
-  if (v->valid_GPU_array == PETSC_VIENNACL_GPU) {
-    ierr = PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);CHKERRQ(ierr);
-    try {
-      ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
-      viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
-      //ierr = WaitForGPU();CHKERRViennaCL(ierr); //Reads in ViennaCL are blocking
-    } catch(char *ex) {
-      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-    }
-    ierr = PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);CHKERRQ(ierr);
-    v->valid_GPU_array = PETSC_VIENNACL_BOTH;
-  }
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLCopyFromGPUSome"
-/* Note that this function only copies *some* of the values up from the GPU to CPU,
-   which means that we need recombine the data at some point before using any of the standard functions.
-   We could add another few flag-types to keep track of this, or treat things like VecGetArray VecRestoreArray
-   where you have to always call in pairs
-*/
-/* TODO
-PetscErrorCode VecViennaCLCopyFromGPUSome(Vec v, PetscViennaCLIndices ci)
-{
-  ViennaCLARRAY      *varray;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
-  ierr = VecViennaCLAllocateCheckHost(v);CHKERRQ(ierr);
-  if (v->valid_GPU_array == PETSC_ViennaCL_GPU) {
-    ierr   = PetscLogEventBegin(VEC_ViennaCLCopyFromGPUSome,v,0,0,0);CHKERRQ(ierr);
-    varray = ((Vec_ViennaCL*)v->spptr)->GPUarray;
-    Vec_Seq *s;
-    s = (Vec_Seq*)v->data;
-    ViennaCLINTARRAYCPU *indicesCPU=&ci->sendIndicesCPU;
-    ViennaCLINTARRAYGPU *indicesGPU=&ci->sendIndicesGPU;
-
-    thrust::copy(thrust::make_permutation_iterator(varray->begin(),indicesGPU->begin()),
-                 thrust::make_permutation_iterator(varray->begin(),indicesGPU->end()),
-                 thrust::make_permutation_iterator(s->array,indicesCPU->begin()));
-    
-    ierr = VecViennaCLRestoreArrayRead(v,&varray);CHKERRQ(ierr);
-    ierr = PetscLogEventEnd(VEC_ViennaCLCopyFromGPUSome,v,0,0,0);CHKERRQ(ierr);
-    v->valid_GPU_array = PETSC_ViennaCL_BOTH;
-  }
-  PetscFunctionReturn(0);
-} */
-
-
-/* Copy on CPU */
-#undef __FUNCT__
-#define __FUNCT__ "VecCopy_SeqViennaCL_Private"
-static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
-{
-  PetscScalar       *ya;
-  const PetscScalar *xa;
-  PetscErrorCode    ierr;
-
-  PetscFunctionBegin;
-  if (xin != yin) {
-    ierr = VecGetArrayRead(xin,&xa);CHKERRQ(ierr);
-    ierr = VecGetArray(yin,&ya);CHKERRQ(ierr);
-    ierr = PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));CHKERRQ(ierr);
-    ierr = VecRestoreArrayRead(xin,&xa);CHKERRQ(ierr);
-    ierr = VecRestoreArray(yin,&ya);CHKERRQ(ierr);
-  }
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecSetRandom_SeqViennaCL_Private"
-static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
-{
-  PetscErrorCode ierr;
-  PetscInt       n = xin->map->n,i;
-  PetscScalar    *xx;
-
-  PetscFunctionBegin;
-  ierr = VecGetArray(xin,&xx);CHKERRQ(ierr);
-  for (i=0; i<n; i++) {ierr = PetscRandomGetValue(r,&xx[i]);CHKERRQ(ierr);}
-  ierr = VecRestoreArray(xin,&xx);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecDestroy_SeqViennaCL_Private"
-static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
-{
-  Vec_Seq        *vs = (Vec_Seq*)v->data;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = PetscObjectDepublish(v);CHKERRQ(ierr);
-#if defined(PETSC_USE_LOG)
-  PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
-#endif
-  if (vs->array_allocated) ierr = PetscFree(vs->array_allocated);CHKERRQ(ierr);
-  ierr = PetscFree(vs);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecResetArray_SeqViennaCL_Private"
-static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
-{
-  Vec_Seq *v = (Vec_Seq*)vin->data;
-
-  PetscFunctionBegin;
-  v->array         = v->unplacedarray;
-  v->unplacedarray = 0;
-  PetscFunctionReturn(0);
-}
-
-/* these following 3 public versions are necessary because we use ViennaCL in the regular PETSc code and these need to be called from plain C code. */
-/*
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLAllocateCheck_Public"
-PetscErrorCode VecViennaCLAllocateCheck_Public(Vec v)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLCopyToGPU_Public"
-PetscErrorCode VecViennaCLCopyToGPU_Public(Vec v)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-} */
-
-#undef __FUNCT__
-#define __FUNCT__ "PetscViennaCLIndicesCreate"
-/*
-    PetscViennaCLIndicesCreate - creates the data structure needed by VecViennaCLCopyToGPUSome_Public()
-
-   Input Parameters:
-+    n - the number of indices
--    indices - integer list of indices
-
-   Output Parameter:
-.    ci - the ViennaCLIndices object suitable to pass to VecViennaCLCopyToGPUSome_Public()
-
-.seealso: PetscViennaCLIndicesDestroy(), VecViennaCLCopyToGPUSome_Public()
-*/
-/*PetscErrorCode PetscViennaCLIndicesCreate(PetscInt ns,PetscInt *sendIndices,PetscInt nr,PetscInt *recvIndices,PetscViennaCLIndices *ci)
-{
-  PetscViennaCLIndices cci;
-
-  PetscFunctionBegin;
-  cci = new struct _p_PetscViennaCLIndices;
-
-  cci->sendIndicesCPU.assign(sendIndices,sendIndices+ns);
-  cci->sendIndicesGPU.assign(sendIndices,sendIndices+ns);
-
-  cci->recvIndicesCPU.assign(recvIndices,recvIndices+nr);
-  cci->recvIndicesGPU.assign(recvIndices,recvIndices+nr);
-  
-  *ci = cci;
-  PetscFunctionReturn(0);
-}*/
-
-#undef __FUNCT__
-#define __FUNCT__ "PetscViennaCLIndicesDestroy"
-/*
-    PetscViennaCLIndicesDestroy - destroys the data structure needed by VecViennaCLCopyToGPUSome_Public()
-
-   Input Parameters:
-.    ci - the ViennaCLIndices object suitable to pass to VecViennaCLCopyToGPUSome_Public()
-
-.seealso: PetscViennaCLIndicesCreate(), VecViennaCLCopyToGPUSome_Public()
-*/
-/*PetscErrorCode PetscViennaCLIndicesDestroy(PetscViennaCLIndices *ci)
-{
-  PetscFunctionBegin;
-  if (!(*ci)) PetscFunctionReturn(0);
-  try {
-    if (ci) delete *ci;
-  } catch(char *ex) {
-    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-  }
-  *ci = 0;
-  PetscFunctionReturn(0);
-} */
-
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLCopyToGPUSome_Public"
-/*
-    VecViennaCLCopyToGPUSome_Public - Copies certain entries down to the GPU from the CPU of a vector
-
-   Input Parameters:
-+    v - the vector
--    indices - the requested indices, this should be created with ViennaCLIndicesCreate()
-
-*/
-/*PetscErrorCode VecViennaCLCopyToGPUSome_Public(Vec v, PetscViennaCLIndices ci)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLCopyToGPUSome(v,ci);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-} */
-
-#undef __FUNCT__
-#define __FUNCT__ "VecViennaCLCopyFromGPUSome_Public"
-/*
-  VecViennaCLCopyFromGPUSome_Public - Copies certain entries up to the CPU from the GPU of a vector
-
-  Input Parameters:
- +    v - the vector
- -    indices - the requested indices, this should be created with ViennaCLIndicesCreate()
-*/
-/*PetscErrorCode VecViennaCLCopyFromGPUSome_Public(Vec v, PetscViennaCLIndices ci)
-{
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLCopyFromGPUSome(v,ci);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-} */
-
-
-
-/*MC
-   VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
-
-   Options Database Keys:
-. -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
-
-  Level: beginner
-
-.seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
-M*/
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecAYPX_SeqViennaCL"
-PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
-{
-  ViennaCLVector *xgpu,*ygpu;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  if (alpha != 0.0) {
-    ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
-    ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
-    try {
-      *ygpu = *xgpu + alpha * *ygpu;
-    } catch(char *ex) {
-      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-    }
-    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
-    ierr = VecViennaCLRestoreArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
-    ierr = PetscLogFlops(2.0*yin->map->n);CHKERRQ(ierr);
-  }
-  PetscFunctionReturn(0);
-}
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecAXPY_SeqViennaCL"
-PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
-{
-  ViennaCLVector *xgpu,*ygpu;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  if (alpha != 0.0) {
-    ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
-    ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
-    try {
-      *ygpu += alpha * *xgpu;
-    } catch(char *ex) {
-      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-    }
-    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
-    ierr = VecViennaCLRestoreArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
-    ierr = PetscLogFlops(2.0*yin->map->n);CHKERRQ(ierr);
-  }
-  PetscFunctionReturn(0);
-}
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecPointwiseDivide_SeqViennaCL"
-PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
-{
-  ViennaCLVector *xgpu,*ygpu,*wgpu;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
-  ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
-  ierr = VecViennaCLGetArrayWrite(win,&wgpu);CHKERRQ(ierr);
-  try {
-    *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
-  } catch(char *ex) {
-    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-  }
-  ierr = PetscLogFlops(win->map->n);CHKERRQ(ierr);
-  ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
-  ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
-  ierr = VecViennaCLRestoreArrayWrite(win,&wgpu);CHKERRQ(ierr);
-  PetscFunctionReturn(0);
-}
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecWAXPY_SeqViennaCL"
-PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
-{
-  ViennaCLVector *xgpu,*ygpu,*wgpu;
-  PetscErrorCode ierr;
-
-  PetscFunctionBegin;
-  if (alpha == 0.0) {
-    ierr = VecCopy_SeqViennaCL(yin,win);CHKERRQ(ierr);
-  } else {
-    ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
-    ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
-    ierr = VecViennaCLGetArrayWrite(win,&wgpu);CHKERRQ(ierr);
-    if (alpha == 1.0) {
-      try {
-        *wgpu = *ygpu + *xgpu;
-      } catch(char *ex) {
-        SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-      }
-      ierr = PetscLogFlops(win->map->n);CHKERRQ(ierr);
-    } else if (alpha == -1.0) {
-      try {
-        *wgpu = *ygpu - *xgpu;
-      } catch(char *ex) {
-        SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-      }
-      ierr = PetscLogFlops(win->map->n);CHKERRQ(ierr);
-    } else {
-      try {
-        *wgpu = *ygpu + alpha * *xgpu;
-      } catch(char *ex) {
-        SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
-      }
-      ierr = PetscLogFlops(2*win->map->n);CHKERRQ(ierr);
-    }
-    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
-    ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
-    ierr = VecViennaCLRestoreArrayWrite(win,&wgpu);CHKERRQ(ierr);
-  }
-  PetscFunctionReturn(0);
-}
-
-
-/*
- * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
- *
- * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z.
- * Since there is no WAXPBY-function in PETSc, we use a fallback to an iterated application of x += alpha_i y_i for now.
- */
-#undef __FUNCT__
-#define __FUNCT__ "VecMAXPY_SeqViennaCL"
-PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
-{
-  PetscErrorCode ierr;
-  PetscInt       j;
-
-  PetscFunctionBegin;
-  for (j = 0; j < nv; ++j) {
-    ierr = VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);CHKERRQ(ierr);   /* Note: The extra GetArray/RestoreArray-overhead is acceptable because of the kernel launch overhead */
-  }
-  PetscFunctionReturn(0);
-}
-
-
-#undef __FUNCT__
-#define __FUNCT__ "VecDot_SeqViennaCL"
-PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
-{
-  ViennaCLVector