1. petsc
  2. PETSc
  3. petsc

Commits

Karl Rupp  committed b17c682

ViennaCL: First draft, including all operations for sequential vectors.

Mostly derived from existing CUSP interface.
viennacl.py package for BuildSystem included, detected correctly by BuildSystem, but download option untested.
Everything compiles and the standard 'make test' passes. In-depth tests still required.

  • Participants
  • Parent commits 0f0f11e
  • Branches master

Comments (0)

Files changed (13)

File config/PETSc/packages/viennacl.py

View file
  • Ignore whitespace
+from __future__ import generators
+import config.package
+
+class Configure(config.package.Package):
+  def __init__(self, framework):
+    config.package.Package.__init__(self, framework)
+    self.download        = ['http://downloads.sourceforge.net/project/viennacl/1.4.x/ViennaCL-1.4.1-src.tar.gz']
+    self.includes        = ['viennacl/forwards.h']
+    self.cxx             = 1
+    self.archIndependent = 1
+    self.worksonWindows  = 1
+    self.downloadonWindows = 1
+    return
+
+  def Install(self):
+    import shutil
+    import os
+    self.framework.log.write('ViennaCLDir = '+self.packageDir+' installDir '+self.installDir+'\n')
+    srcdir = os.path.join(self.packageDir,'viennacl')
+    destdir = os.path.join(self.installDir,'include','viennacl')
+    try:
+      if os.path.isdir(destdir): shutil.rmtree(destdir)
+      shutil.copytree(srcdir,destdir)
+    except RuntimeError,e:
+      raise RuntimeError('Error installing ViennaCL include files: '+str(e))
+    return self.installDir
+
+  def getSearchDirectories(self):
+    yield ''
+    return

File include/finclude/petscvecdef.h

View file
  • Ignore whitespace
 #define VECSEQCUSP 'seqcusp'
 #define VECMPICUSP 'mpicusp'
 #define VECCUSP 'cusp'
+#define VECSEQVIENNACL 'seqviennacl'
+#define VECMPIVIENNACL 'mpiviennacl'
+#define VECVIENNACL    'viennacl'
 #define VECSHARED 'shared'
 #define VECESI 'esi'
 #define VECPETSCESI 'petscesi'

File include/petsc-private/petscimpl.h

View file
  • Ignore whitespace
 typedef enum {PETSC_CUSP_UNALLOCATED,PETSC_CUSP_GPU,PETSC_CUSP_CPU,PETSC_CUSP_BOTH} PetscCUSPFlag;
 #endif
 
+#if defined(PETSC_HAVE_VIENNACL)
+/*E
+    PetscViennaCLFlag - indicates which memory (CPU, GPU, or none contains valid vector
+
+   PETSC_VIENNACL_UNALLOCATED  - no memory contains valid matrix entries; NEVER used for vectors
+   PETSC_VIENNACL_GPU - GPU has valid vector/matrix entries
+   PETSC_VIENNACL_CPU - CPU has valid vector/matrix entries
+   PETSC_VIENNACL_BOTH - Both GPU and CPU have valid vector/matrix entries and they match
+
+   Level: developer
+E*/
+typedef enum {PETSC_VIENNACL_UNALLOCATED,PETSC_VIENNACL_GPU,PETSC_VIENNACL_CPU,PETSC_VIENNACL_BOTH} PetscViennaCLFlag;
+#endif
+
 #endif /* _PETSCHEAD_H */

File include/petsc-private/vecimpl.h

View file
  • Ignore whitespace
   PetscCUSPFlag          valid_GPU_array;    /* indicates where the most recently modified vector data is (GPU or CPU) */
   void                   *spptr; /* if we're using CUSP, then this is the special pointer to the array on the GPU */
 #endif
+#if defined(PETSC_HAVE_VIENNACL)
+  PetscViennaCLFlag      valid_GPU_array;    /* indicates where the most recently modified vector data is (GPU or CPU) */
+  void                   *spptr; /* if we're using ViennaCL, then this is the special pointer to the array on the GPU */
+#endif
 };
 
 PETSC_EXTERN PetscLogEvent VEC_View, VEC_Max, VEC_Min, VEC_DotBarrier, VEC_Dot, VEC_MDotBarrier, VEC_MDot, VEC_TDot, VEC_MTDot;
 PETSC_EXTERN PetscLogEvent VEC_Swap, VEC_AssemblyBegin, VEC_NormBarrier, VEC_DotNormBarrier, VEC_DotNorm, VEC_AXPBYPCZ, VEC_Ops;
 PETSC_EXTERN PetscLogEvent VEC_CUSPCopyToGPU, VEC_CUSPCopyFromGPU;
 PETSC_EXTERN PetscLogEvent VEC_CUSPCopyToGPUSome, VEC_CUSPCopyFromGPUSome;
+PETSC_EXTERN PetscLogEvent VEC_ViennaCLCopyToGPU,     VEC_ViennaCLCopyFromGPU;
+/*PETSC_EXTERN PetscLogEvent VEC_ViennaCLCopyToGPUSome, VEC_ViennaCLCopyFromGPUSome;*/
 
 #if defined(PETSC_HAVE_CUSP)
 PETSC_EXTERN PetscErrorCode VecCUSPAllocateCheckHost(Vec v);
 PETSC_EXTERN PetscErrorCode VecCUSPCopyFromGPU(Vec v);
 #endif
 
+#if defined(PETSC_HAVE_VIENNACL)
+PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v);
+PETSC_EXTERN PetscErrorCode VecViennaCLCopyFromGPU(Vec v);
+#endif
+
 
 /*
      Common header shared by array based vectors,

File include/petscvec.h

View file
  • Ignore whitespace
 #define VECSEQCUSP     "seqcusp"
 #define VECMPICUSP     "mpicusp"
 #define VECCUSP        "cusp"       /* seqcusp on one process and mpicusp on several */
+#define VECSEQVIENNACL "seqviennacl"
+#define VECMPIVIENNACL "mpiviennacl"
+#define VECVIENNACL    "viennacl"   /* seqviennacl on one process and mpiviennacl on several */
 #define VECNEST        "nest"
 #define VECSEQPTHREAD  "seqpthread"
 #define VECMPIPTHREAD  "mpipthread"
 PETSC_EXTERN PetscErrorCode VecCreateMPICUSP(MPI_Comm,PetscInt,PetscInt,Vec*);
 #endif
 
+#if defined(PETSC_HAVE_VIENNACL)
+typedef struct _p_PetscViennaCLIndices* PetscViennaCLIndices;
+PETSC_EXTERN PetscErrorCode PetscViennaCLIndicesCreate(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscViennaCLIndices*);
+PETSC_EXTERN PetscErrorCode PetscViennaCLIndicesDestroy(PetscViennaCLIndices*);
+PETSC_EXTERN PetscErrorCode VecViennaCLCopyToGPUSome_Public(Vec,PetscViennaCLIndices);
+PETSC_EXTERN PetscErrorCode VecViennaCLCopyFromGPUSome_Public(Vec,PetscViennaCLIndices);
+PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCL(MPI_Comm,PetscInt,Vec*);
+PETSC_EXTERN PetscErrorCode VecCreateMPIViennaCL(MPI_Comm,PetscInt,PetscInt,Vec*);
+#endif
+
 PETSC_EXTERN PetscErrorCode VecNestGetSubVecs(Vec,PetscInt*,Vec**);
 PETSC_EXTERN PetscErrorCode VecNestGetSubVec(Vec,PetscInt,Vec*);
 PETSC_EXTERN PetscErrorCode VecNestSetSubVecs(Vec,PetscInt,PetscInt*,Vec*);

File src/vec/vec/impls/seq/makefile

View file
  • Ignore whitespace
 LIBBASE  = libpetscvec
 MANSEC   = Vec
 LOCDIR   = src/vec/vec/impls/seq/
-DIRS     = ftn-kernels seqcusp
+DIRS     = ftn-kernels seqcusp seqviennacl
 
 include ${PETSC_DIR}/conf/variables
 include ${PETSC_DIR}/conf/rules

File src/vec/vec/impls/seq/seqviennacl/makefile

View file
  • Ignore whitespace
+#requirespackage 'PETSC_HAVE_VIENNACL'
+#requireslanguage CXXONLY
+ALL: lib
+
+CFLAGS   =
+FFLAGS   =
+SOURCEC  = vecviennacl.cpp
+SOURCEF  =
+SOURCEH  = viennaclvecimpl.h
+LIBBASE  = libpetscvec
+MANSEC   = Vec
+LOCDIR   = src/vec/vec/impls/seq/seqviennacl/
+DIRS     =
+
+include ${PETSC_DIR}/conf/variables
+include ${PETSC_DIR}/conf/rules
+include ${PETSC_DIR}/conf/test
+

File src/vec/vec/impls/seq/seqviennacl/vecviennacl.cpp

View file
  • Ignore whitespace
+/*
+   Implements the sequential ViennaCL vectors.
+*/
+
+#include <petscconf.h>
+#include <petsc-private/vecimpl.h>          /*I "petscvec.h" I*/
+#include <../src/vec/vec/impls/dvecimpl.h>
+#include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
+
+#include "viennacl/linalg/inner_prod.hpp"
+#include "viennacl/linalg/norm_1.hpp"
+#include "viennacl/linalg/norm_2.hpp"
+#include "viennacl/linalg/norm_inf.hpp"
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLAllocateCheckHost"
+/*
+    Allocates space for the vector array on the Host if it does not exist.
+    Does NOT change the PetscViennaCLFlag for the vector
+    Does NOT zero the ViennaCL array
+ */
+PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
+{
+  PetscErrorCode ierr;
+  PetscScalar    *array;
+  Vec_Seq        *s;
+  PetscInt       n = v->map->n;
+
+  PetscFunctionBegin;
+  s    = (Vec_Seq*)v->data;
+  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
+  if (s->array == 0) {
+    ierr               = PetscMalloc(n*sizeof(PetscScalar),&array);CHKERRQ(ierr);
+    ierr               = PetscLogObjectMemory(v,n*sizeof(PetscScalar));CHKERRQ(ierr);
+    s->array           = array;
+    s->array_allocated = array;
+  }
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLAllocateCheck"
+/*
+    Allocates space for the vector array on the GPU if it does not exist.
+    Does NOT change the PetscViennaCLFlag for the vector
+    Does NOT zero the ViennaCL array
+
+ */
+PetscErrorCode VecViennaCLAllocateCheck(Vec v)
+{
+  PetscErrorCode ierr;
+  int            rank;
+
+  PetscFunctionBegin;
+  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
+  // First allocate memory on the GPU if needed
+  if (!v->spptr) {
+    try {
+      v->spptr                            = new Vec_ViennaCL;
+      ((Vec_ViennaCL*)v->spptr)->GPUarray = new ViennaCLVector((PetscBLASInt)v->map->n);
+      if ((PetscBLASInt)v->map->n > 0)
+        ((Vec_ViennaCL*)v->spptr)->GPUarray->resize((PetscBLASInt)v->map->n);
+
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+  }
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLCopyToGPU"
+/* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
+PetscErrorCode VecViennaCLCopyToGPU(Vec v)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
+  if (v->valid_GPU_array == PETSC_VIENNACL_CPU) {
+    ierr = PetscLogEventBegin(VEC_ViennaCLCopyToGPU,v,0,0,0);CHKERRQ(ierr);
+    try {
+      ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+      viennacl::fast_copy(*(PetscScalar**)v->data, *(PetscScalar**)v->data + v->map->n, vec->begin());
+      //ierr = WaitForGPU();CHKERRViennaCL(ierr);  //copy does not return before data is safe. No need to wait.
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = PetscLogEventEnd(VEC_ViennaCLCopyToGPU,v,0,0,0);CHKERRQ(ierr);
+    v->valid_GPU_array = PETSC_VIENNACL_BOTH;
+  }
+  PetscFunctionReturn(0);
+}
+
+
+/* TODO
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLCopyToGPUSome"
+static PetscErrorCode VecViennaCLCopyToGPUSome(Vec v, PetscViennaCLIndices ci)
+{
+  PetscErrorCode ierr;
+  ViennaCLARRAY      *varray;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
+  if (v->valid_GPU_array == PETSC_ViennaCL_CPU) {
+    ierr = PetscLogEventBegin(VEC_ViennaCLCopyToGPUSome,v,0,0,0);CHKERRQ(ierr);
+    
+    ViennaCLVector & vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+    Vec_Seq *s           = (Vec_Seq*)v->data;
+
+    ViennaCLINTARRAYCPU *indicesCPU=&ci->recvIndicesCPU;
+    ViennaCLINTARRAYGPU *indicesGPU=&ci->recvIndicesGPU;
+
+    viennacl::fast_copy(
+    thrust::copy(thrust::make_permutation_iterator(s->array,indicesCPU->begin()),
+                 thrust::make_permutation_iterator(s->array,indicesCPU->end()),
+                 thrust::make_permutation_iterator(varray->begin(),indicesGPU->begin()));
+
+    // Set the buffer states
+    v->valid_GPU_array = PETSC_VIENNACL_BOTH;
+    ierr = PetscLogEventEnd(VEC_ViennaCLCopyToGPUSome,v,0,0,0);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+*/
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLCopyFromGPU"
+/*
+     VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
+*/
+PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLAllocateCheckHost(v);CHKERRQ(ierr);
+  if (v->valid_GPU_array == PETSC_VIENNACL_GPU) {
+    ierr = PetscLogEventBegin(VEC_ViennaCLCopyFromGPU,v,0,0,0);CHKERRQ(ierr);
+    try {
+      ViennaCLVector *vec = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+      viennacl::fast_copy(vec->begin(),vec->end(),*(PetscScalar**)v->data);
+      //ierr = WaitForGPU();CHKERRViennaCL(ierr); //Reads in ViennaCL are blocking
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = PetscLogEventEnd(VEC_ViennaCLCopyFromGPU,v,0,0,0);CHKERRQ(ierr);
+    v->valid_GPU_array = PETSC_VIENNACL_BOTH;
+  }
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLCopyFromGPUSome"
+/* Note that this function only copies *some* of the values up from the GPU to CPU,
+   which means that we need recombine the data at some point before using any of the standard functions.
+   We could add another few flag-types to keep track of this, or treat things like VecGetArray VecRestoreArray
+   where you have to always call in pairs
+*/
+/* TODO
+PetscErrorCode VecViennaCLCopyFromGPUSome(Vec v, PetscViennaCLIndices ci)
+{
+  ViennaCLARRAY      *varray;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
+  ierr = VecViennaCLAllocateCheckHost(v);CHKERRQ(ierr);
+  if (v->valid_GPU_array == PETSC_ViennaCL_GPU) {
+    ierr   = PetscLogEventBegin(VEC_ViennaCLCopyFromGPUSome,v,0,0,0);CHKERRQ(ierr);
+    varray = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+    Vec_Seq *s;
+    s = (Vec_Seq*)v->data;
+    ViennaCLINTARRAYCPU *indicesCPU=&ci->sendIndicesCPU;
+    ViennaCLINTARRAYGPU *indicesGPU=&ci->sendIndicesGPU;
+
+    thrust::copy(thrust::make_permutation_iterator(varray->begin(),indicesGPU->begin()),
+                 thrust::make_permutation_iterator(varray->begin(),indicesGPU->end()),
+                 thrust::make_permutation_iterator(s->array,indicesCPU->begin()));
+    
+    ierr = VecViennaCLRestoreArrayRead(v,&varray);CHKERRQ(ierr);
+    ierr = PetscLogEventEnd(VEC_ViennaCLCopyFromGPUSome,v,0,0,0);CHKERRQ(ierr);
+    v->valid_GPU_array = PETSC_ViennaCL_BOTH;
+  }
+  PetscFunctionReturn(0);
+} */
+
+
+/* Copy on CPU */
+#undef __FUNCT__
+#define __FUNCT__ "VecCopy_SeqViennaCL_Private"
+static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)
+{
+  PetscScalar       *ya;
+  const PetscScalar *xa;
+  PetscErrorCode    ierr;
+
+  PetscFunctionBegin;
+  if (xin != yin) {
+    ierr = VecGetArrayRead(xin,&xa);CHKERRQ(ierr);
+    ierr = VecGetArray(yin,&ya);CHKERRQ(ierr);
+    ierr = PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));CHKERRQ(ierr);
+    ierr = VecRestoreArrayRead(xin,&xa);CHKERRQ(ierr);
+    ierr = VecRestoreArray(yin,&ya);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecSetRandom_SeqViennaCL_Private"
+static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)
+{
+  PetscErrorCode ierr;
+  PetscInt       n = xin->map->n,i;
+  PetscScalar    *xx;
+
+  PetscFunctionBegin;
+  ierr = VecGetArray(xin,&xx);CHKERRQ(ierr);
+  for (i=0; i<n; i++) {ierr = PetscRandomGetValue(r,&xx[i]);CHKERRQ(ierr);}
+  ierr = VecRestoreArray(xin,&xx);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecDestroy_SeqViennaCL_Private"
+static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
+{
+  Vec_Seq        *vs = (Vec_Seq*)v->data;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = PetscObjectDepublish(v);CHKERRQ(ierr);
+#if defined(PETSC_USE_LOG)
+  PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
+#endif
+  if (vs->array_allocated) ierr = PetscFree(vs->array_allocated);CHKERRQ(ierr);
+  ierr = PetscFree(vs);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecResetArray_SeqViennaCL_Private"
+static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
+{
+  Vec_Seq *v = (Vec_Seq*)vin->data;
+
+  PetscFunctionBegin;
+  v->array         = v->unplacedarray;
+  v->unplacedarray = 0;
+  PetscFunctionReturn(0);
+}
+
+/* these following 3 public versions are necessary because we use ViennaCL in the regular PETSc code and these need to be called from plain C code. */
+/*
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLAllocateCheck_Public"
+PetscErrorCode VecViennaCLAllocateCheck_Public(Vec v)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLCopyToGPU_Public"
+PetscErrorCode VecViennaCLCopyToGPU_Public(Vec v)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+} */
+
+#undef __FUNCT__
+#define __FUNCT__ "PetscViennaCLIndicesCreate"
+/*
+    PetscViennaCLIndicesCreate - creates the data structure needed by VecViennaCLCopyToGPUSome_Public()
+
+   Input Parameters:
++    n - the number of indices
+-    indices - integer list of indices
+
+   Output Parameter:
+.    ci - the ViennaCLIndices object suitable to pass to VecViennaCLCopyToGPUSome_Public()
+
+.seealso: PetscViennaCLIndicesDestroy(), VecViennaCLCopyToGPUSome_Public()
+*/
+/*PetscErrorCode PetscViennaCLIndicesCreate(PetscInt ns,PetscInt *sendIndices,PetscInt nr,PetscInt *recvIndices,PetscViennaCLIndices *ci)
+{
+  PetscViennaCLIndices cci;
+
+  PetscFunctionBegin;
+  cci = new struct _p_PetscViennaCLIndices;
+
+  cci->sendIndicesCPU.assign(sendIndices,sendIndices+ns);
+  cci->sendIndicesGPU.assign(sendIndices,sendIndices+ns);
+
+  cci->recvIndicesCPU.assign(recvIndices,recvIndices+nr);
+  cci->recvIndicesGPU.assign(recvIndices,recvIndices+nr);
+  
+  *ci = cci;
+  PetscFunctionReturn(0);
+}*/
+
+#undef __FUNCT__
+#define __FUNCT__ "PetscViennaCLIndicesDestroy"
+/*
+    PetscViennaCLIndicesDestroy - destroys the data structure needed by VecViennaCLCopyToGPUSome_Public()
+
+   Input Parameters:
+.    ci - the ViennaCLIndices object suitable to pass to VecViennaCLCopyToGPUSome_Public()
+
+.seealso: PetscViennaCLIndicesCreate(), VecViennaCLCopyToGPUSome_Public()
+*/
+/*PetscErrorCode PetscViennaCLIndicesDestroy(PetscViennaCLIndices *ci)
+{
+  PetscFunctionBegin;
+  if (!(*ci)) PetscFunctionReturn(0);
+  try {
+    if (ci) delete *ci;
+  } catch(char *ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  *ci = 0;
+  PetscFunctionReturn(0);
+} */
+
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLCopyToGPUSome_Public"
+/*
+    VecViennaCLCopyToGPUSome_Public - Copies certain entries down to the GPU from the CPU of a vector
+
+   Input Parameters:
++    v - the vector
+-    indices - the requested indices, this should be created with ViennaCLIndicesCreate()
+
+*/
+/*PetscErrorCode VecViennaCLCopyToGPUSome_Public(Vec v, PetscViennaCLIndices ci)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLCopyToGPUSome(v,ci);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+} */
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLCopyFromGPUSome_Public"
+/*
+  VecViennaCLCopyFromGPUSome_Public - Copies certain entries up to the CPU from the GPU of a vector
+
+  Input Parameters:
+ +    v - the vector
+ -    indices - the requested indices, this should be created with ViennaCLIndicesCreate()
+*/
+/*PetscErrorCode VecViennaCLCopyFromGPUSome_Public(Vec v, PetscViennaCLIndices ci)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLCopyFromGPUSome(v,ci);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+} */
+
+
+
+/*MC
+   VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
+
+   Options Database Keys:
+. -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
+
+  Level: beginner
+
+.seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateSeqWithArray(), VECMPI, VecType, VecCreateMPI(), VecCreateSeq()
+M*/
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecAYPX_SeqViennaCL"
+PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
+{
+  ViennaCLVector *xgpu,*ygpu;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  if (alpha != 0.0) {
+    ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+    try {
+      *ygpu = *xgpu + alpha * *ygpu;
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+    ierr = PetscLogFlops(2.0*yin->map->n);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecAXPY_SeqViennaCL"
+PetscErrorCode VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)
+{
+  ViennaCLVector *xgpu,*ygpu;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  if (alpha != 0.0) {
+    ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+    try {
+      *ygpu += alpha * *xgpu;
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+    ierr = PetscLogFlops(2.0*yin->map->n);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecPointwiseDivide_SeqViennaCL"
+PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
+{
+  ViennaCLVector *xgpu,*ygpu,*wgpu;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
+  ierr = VecViennaCLGetArrayWrite(win,&wgpu);CHKERRQ(ierr);
+  try {
+    *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
+  } catch(char *ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  ierr = PetscLogFlops(win->map->n);CHKERRQ(ierr);
+  ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
+  ierr = VecViennaCLRestoreArrayWrite(win,&wgpu);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecWAXPY_SeqViennaCL"
+PetscErrorCode VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin, Vec yin)
+{
+  ViennaCLVector *xgpu,*ygpu,*wgpu;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  if (alpha == 0.0) {
+    ierr = VecCopy_SeqViennaCL(yin,win);CHKERRQ(ierr);
+  } else {
+    ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
+    ierr = VecViennaCLGetArrayWrite(win,&wgpu);CHKERRQ(ierr);
+    if (alpha == 1.0) {
+      try {
+        *wgpu = *ygpu + *xgpu;
+      } catch(char *ex) {
+        SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+      }
+      ierr = PetscLogFlops(win->map->n);CHKERRQ(ierr);
+    } else if (alpha == -1.0) {
+      try {
+        *wgpu = *ygpu - *xgpu;
+      } catch(char *ex) {
+        SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+      }
+      ierr = PetscLogFlops(win->map->n);CHKERRQ(ierr);
+    } else {
+      try {
+        *wgpu = *ygpu + alpha * *xgpu;
+      } catch(char *ex) {
+        SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+      }
+      ierr = PetscLogFlops(2*win->map->n);CHKERRQ(ierr);
+    }
+    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayWrite(win,&wgpu);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+
+/*
+ * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
+ *
+ * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z.
+ * Since there is no WAXPBY-function in PETSc, we use a fallback to an iterated application of x += alpha_i y_i for now.
+ */
+#undef __FUNCT__
+#define __FUNCT__ "VecMAXPY_SeqViennaCL"
+PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv,const PetscScalar *alpha,Vec *y)
+{
+  PetscErrorCode ierr;
+  PetscInt       j;
+
+  PetscFunctionBegin;
+  for (j = 0; j < nv; ++j) {
+    ierr = VecAXPY_SeqViennaCL(xin,alpha[j],y[j]);CHKERRQ(ierr);   /* Note: The extra GetArray/RestoreArray-overhead is acceptable because of the kernel launch overhead */
+  }
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecDot_SeqViennaCL"
+PetscErrorCode VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
+{
+  ViennaCLVector *xgpu,*ygpu;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
+  try {
+    *z = viennacl::linalg::inner_prod(*xgpu,*ygpu);
+  } catch(char *ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  if (xin->map->n >0) {
+    ierr = PetscLogFlops(2.0*xin->map->n-1);CHKERRQ(ierr);
+  }
+  ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+
+/*
+ * Operation z[j] = dot(x, y[j])
+ * 
+ * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
+ */
+#undef __FUNCT__
+#define __FUNCT__ "VecMDot_SeqViennaCL"
+PetscErrorCode VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
+{
+  PetscErrorCode ierr;
+  PetscInt       n = xin->map->n,i;
+  ViennaCLVector *xgpu,*ygpu;
+  Vec            *yyin = (Vec*)yin;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  for (i=0; i<nv; i++) {
+    ierr = VecViennaCLGetArrayRead(yyin[i],&ygpu);CHKERRQ(ierr);
+    try {
+      z[i] = viennacl::linalg::inner_prod(*xgpu,*ygpu);
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = VecViennaCLRestoreArrayRead(yyin[i],&ygpu);CHKERRQ(ierr);
+  }
+  
+  ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  ierr = PetscLogFlops(PetscMax(nv*(2.0*n-1),0.0));CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecSet_SeqViennaCL"
+PetscErrorCode VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)
+{
+  ViennaCLVector *xgpu;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLGetArrayWrite(xin,&xgpu);CHKERRQ(ierr);
+  try {
+    *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
+  } catch(char *ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  ierr = VecViennaCLRestoreArrayWrite(xin,&xgpu);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecScale_SeqViennaCL"
+PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
+{
+  ViennaCLVector *xgpu;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  if (alpha == 0.0) {
+    ierr = VecSet_SeqViennaCL(xin,alpha);CHKERRQ(ierr);
+    ierr = PetscLogFlops(xin->map->n);CHKERRQ(ierr);
+  } else if (alpha != 1.0) {
+    ierr = VecViennaCLGetArrayReadWrite(xin,&xgpu);CHKERRQ(ierr);
+    try {
+      *xgpu *= alpha;
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = VecViennaCLRestoreArrayReadWrite(xin,&xgpu);CHKERRQ(ierr);
+    ierr = PetscLogFlops(xin->map->n);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecTDot_SeqViennaCL"
+PetscErrorCode VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar *z)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
+  ierr = VecDot_SeqViennaCL(xin, yin, z);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecCopy_SeqViennaCL"
+PetscErrorCode VecCopy_SeqViennaCL(Vec xin,Vec yin)
+{
+  ViennaCLVector *xgpu,*ygpu;
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  if (xin != yin) {
+    if (xin->valid_GPU_array == PETSC_VIENNACL_GPU) {
+      ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+      ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
+      try {
+        *ygpu = *xgpu;
+      } catch(char *ex) {
+        SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+      }
+      ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+      ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
+
+    } else if (xin->valid_GPU_array == PETSC_VIENNACL_CPU) {
+      /* copy in CPU if we are on the CPU*/
+      ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
+    } else if (xin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
+      /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
+      if (yin->valid_GPU_array == PETSC_VIENNACL_CPU) {
+        /* copy in CPU */
+        ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
+
+      } else if (yin->valid_GPU_array == PETSC_VIENNACL_GPU) {
+        /* copy in GPU */
+        ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+        ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
+        try {
+          *ygpu = *xgpu;
+        } catch(char *ex) {
+          SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+        }
+        ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+        ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
+      } else if (yin->valid_GPU_array == PETSC_VIENNACL_BOTH) {
+        /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
+           default to copy in GPU (this is an arbitrary choice) */
+        ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+        ierr = VecViennaCLGetArrayWrite(yin,&ygpu);CHKERRQ(ierr);
+        try {
+          *ygpu = *xgpu;
+        } catch(char *ex) {
+          SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+        }
+        ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+        ierr = VecViennaCLRestoreArrayWrite(yin,&ygpu);CHKERRQ(ierr);
+      } else {
+        ierr = VecCopy_SeqViennaCL_Private(xin,yin);CHKERRQ(ierr);
+      }
+    }
+  }
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecSwap_SeqViennaCL"
+PetscErrorCode VecSwap_SeqViennaCL(Vec xin,Vec yin)
+{
+  PetscErrorCode ierr;
+  ViennaCLVector *xgpu,*ygpu;
+
+  PetscFunctionBegin;
+  if (xin != yin) {
+    ierr = VecViennaCLGetArrayReadWrite(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+
+    try {
+      viennacl::swap(*xgpu, *ygpu);
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = VecViennaCLRestoreArrayReadWrite(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+
+// y = alpha * x + beta * y
+#undef __FUNCT__
+#define __FUNCT__ "VecAXPBY_SeqViennaCL"
+PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)
+{
+  PetscErrorCode ierr;
+  PetscScalar    a = alpha,b = beta;
+  ViennaCLVector *xgpu,*ygpu;
+
+  PetscFunctionBegin;
+  if (a == 0.0) {
+    ierr = VecScale_SeqViennaCL(yin,beta);CHKERRQ(ierr);
+  } else if (b == 1.0) {
+    ierr = VecAXPY_SeqViennaCL(yin,alpha,xin);CHKERRQ(ierr);
+  } else if (a == 1.0) {
+    ierr = VecAYPX_SeqViennaCL(yin,beta,xin);CHKERRQ(ierr);
+  } else if (b == 0.0) {
+    ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+    try {
+      *ygpu = *xgpu * alpha;
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = PetscLogFlops(xin->map->n);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+  } else {
+    ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLGetArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+    try {
+      *ygpu = *xgpu * alpha + *ygpu * beta;
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayReadWrite(yin,&ygpu);CHKERRQ(ierr);
+    ierr = PetscLogFlops(3.0*xin->map->n);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+
+/* operation  z = alpha * x + beta *y + gamma *z*/
+#undef __FUNCT__
+#define __FUNCT__ "VecAXPBYPCZ_SeqViennaCL"
+PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)
+{
+  PetscErrorCode ierr;
+  PetscInt       n = zin->map->n;
+  ViennaCLVector *xgpu,*ygpu,*zgpu;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
+  ierr = VecViennaCLGetArrayReadWrite(zin,&zgpu);CHKERRQ(ierr);
+  if (alpha == 0.0) {
+    try {
+      if (beta == 0.0) {
+        *zgpu = gamma * *zgpu;
+        ierr = PetscLogFlops(1.0*n);CHKERRQ(ierr);
+      } else if (gamma == 0.0) {
+        *zgpu = beta * *ygpu;
+        ierr = PetscLogFlops(1.0*n);CHKERRQ(ierr);
+      } else {
+        *zgpu = beta * *ygpu + gamma * *zgpu;
+        ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
+      }
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
+  } else if (beta == 0.0) {
+    try {
+      if (gamma == 0.0) {
+        *zgpu = alpha * *xgpu;
+        ierr = PetscLogFlops(1.0*n);CHKERRQ(ierr);
+      } else {
+        *zgpu = alpha * *xgpu + gamma * *zgpu;
+        ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
+      }
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+  } else if (gamma == 0.0) {
+    try {
+      *zgpu = alpha * *xgpu + beta * *ygpu;
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
+  } else {
+    try {
+      /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
+      *zgpu *= gamma;
+      *zgpu += alpha * *xgpu + beta * *ygpu;
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = VecViennaCLRestoreArrayReadWrite(zin,&zgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
+    ierr = PetscLogFlops(5.0*n);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecPointwiseMult_SeqViennaCL"
+PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)
+{
+  PetscErrorCode ierr;
+  PetscInt       n = win->map->n;
+  ViennaCLVector *xgpu,*ygpu,*wgpu;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  ierr = VecViennaCLGetArrayRead(yin,&ygpu);CHKERRQ(ierr);
+  ierr = VecViennaCLGetArrayReadWrite(win,&wgpu);CHKERRQ(ierr);
+  try {
+    *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
+  } catch(char *ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  ierr = VecViennaCLRestoreArrayRead(yin,&ygpu);CHKERRQ(ierr);
+  ierr = VecViennaCLRestoreArrayReadWrite(win,&wgpu);CHKERRQ(ierr);
+  ierr = PetscLogFlops(n);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+/* should do infinity norm in cusp */
+
+#undef __FUNCT__
+#define __FUNCT__ "VecNorm_SeqViennaCL"
+PetscErrorCode VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal *z)
+{
+  PetscErrorCode    ierr;
+  PetscInt          n = xin->map->n;
+  PetscBLASInt      bn;
+  ViennaCLVector    *xgpu;
+
+  PetscFunctionBegin;
+  ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
+  ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  if (type == NORM_2 || type == NORM_FROBENIUS) {
+    try {
+      *z = viennacl::linalg::norm_2(*xgpu);
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = PetscLogFlops(PetscMax(2.0*n-1,0.0));CHKERRQ(ierr);
+  } else if (type == NORM_INFINITY) {
+    ierr = VecViennaCLGetArrayRead(xin,&xgpu);CHKERRQ(ierr);
+    try {
+      *z = viennacl::linalg::norm_inf(*xgpu);
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  } else if (type == NORM_1) {
+    try {
+      *z = viennacl::linalg::norm_1(*xgpu);
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = PetscLogFlops(PetscMax(n-1.0,0.0));CHKERRQ(ierr);
+  } else if (type == NORM_1_AND_2) {
+    try {
+      *z     = viennacl::linalg::norm_1(*xgpu);
+      *(z+1) = viennacl::linalg::norm_2(*xgpu);
+    } catch(char *ex) {
+      SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+    }
+    ierr = PetscLogFlops(PetscMax(2.0*n-1,0.0));CHKERRQ(ierr);
+    ierr = PetscLogFlops(PetscMax(n-1.0,0.0));CHKERRQ(ierr);
+  }
+  ierr = VecViennaCLRestoreArrayRead(xin,&xgpu);CHKERRQ(ierr);
+  //printf("VecNorm_SeqViennaCL=%1.5g\n",*z);
+  PetscFunctionReturn(0);
+}
+
+
+/*the following few functions should be modified to actually work with the GPU so they don't force unneccesary allocation of CPU memory */
+
+#undef __FUNCT__
+#define __FUNCT__ "VecSetRandom_SeqViennaCL"
+PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecSetRandom_SeqViennaCL_Private(xin,r);CHKERRQ(ierr);
+  xin->valid_GPU_array = PETSC_VIENNACL_CPU;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecResetArray_SeqViennaCL"
+PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLCopyFromGPU(vin);CHKERRQ(ierr);
+  ierr = VecResetArray_SeqViennaCL_Private(vin);CHKERRQ(ierr);
+  vin->valid_GPU_array = PETSC_VIENNACL_CPU;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecPlaceArray_SeqViennaCL"
+PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLCopyFromGPU(vin);CHKERRQ(ierr);
+  ierr = VecPlaceArray_Seq(vin,a);CHKERRQ(ierr);
+  vin->valid_GPU_array = PETSC_VIENNACL_CPU;
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecReplaceArray_SeqViennaCL"
+PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar *a)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecViennaCLCopyFromGPU(vin);CHKERRQ(ierr);
+  ierr = VecReplaceArray_Seq(vin,a);CHKERRQ(ierr);
+  vin->valid_GPU_array = PETSC_VIENNACL_CPU;
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecCreateSeqViennaCL"
+/*@
+   VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
+
+   Collective on MPI_Comm
+
+   Input Parameter:
++  comm - the communicator, should be PETSC_COMM_SELF
+-  n - the vector length
+
+   Output Parameter:
+.  V - the vector
+
+   Notes:
+   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
+   same type as an existing vector.
+
+   Level: intermediate
+
+   Concepts: vectors^creating sequential
+
+.seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
+@*/
+PetscErrorCode  VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec *v)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecCreate(comm,v);CHKERRQ(ierr);
+  ierr = VecSetSizes(*v,n,n);CHKERRQ(ierr);
+  ierr = VecSetType(*v,VECSEQVIENNACL);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+/*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector 
+ * 
+ *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
+ */
+#undef __FUNCT__
+#define __FUNCT__ "VecDotNorm2_SeqViennaCL"
+PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
+{
+  PetscErrorCode                         ierr;
+
+  PetscFunctionBegin;
+  ierr = VecDot_SeqViennaCL(s,t,dp);CHKERRQ(ierr);
+  ierr = VecNorm_SeqViennaCL(t,NORM_2,nm);CHKERRQ(ierr);
+  *nm *= *nm; //squared norm required
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecDuplicate_SeqViennaCL"
+PetscErrorCode VecDuplicate_SeqViennaCL(Vec win,Vec *V)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  ierr = VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win),win->map->n,V);CHKERRQ(ierr);
+  ierr = PetscLayoutReference(win->map,&(*V)->map);CHKERRQ(ierr);
+  ierr = PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);CHKERRQ(ierr);
+  ierr = PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);CHKERRQ(ierr);
+  (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecDestroy_SeqViennaCL"
+PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  try {
+    if (v->spptr) {
+      delete ((Vec_ViennaCL*)v->spptr)->GPUarray;
+      delete (Vec_ViennaCL*) v->spptr;
+    }
+  } catch(char *ex) {
+    SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex);
+  }
+  ierr = VecDestroy_SeqViennaCL_Private(v);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecCreate_SeqViennaCL"
+PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
+{
+  PetscErrorCode ierr;
+  PetscMPIInt    size;
+
+  PetscFunctionBegin;
+  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);CHKERRQ(ierr);
+  if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQVIENNACL on more than one process");
+  ierr = VecCreate_Seq_Private(V,0);CHKERRQ(ierr);
+  ierr = PetscObjectChangeTypeName((PetscObject)V,VECSEQVIENNACL);CHKERRQ(ierr);
+
+  V->ops->dot             = VecDot_SeqViennaCL;
+  V->ops->norm            = VecNorm_SeqViennaCL;
+  V->ops->tdot            = VecTDot_SeqViennaCL;
+  V->ops->scale           = VecScale_SeqViennaCL;
+  V->ops->copy            = VecCopy_SeqViennaCL;
+  V->ops->set             = VecSet_SeqViennaCL;
+  V->ops->swap            = VecSwap_SeqViennaCL;
+  V->ops->axpy            = VecAXPY_SeqViennaCL;
+  V->ops->axpby           = VecAXPBY_SeqViennaCL;
+  V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
+  V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
+  V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
+  V->ops->setrandom       = VecSetRandom_SeqViennaCL;
+  V->ops->dot_local       = VecDot_SeqViennaCL;
+  V->ops->tdot_local      = VecTDot_SeqViennaCL;
+  V->ops->norm_local      = VecNorm_SeqViennaCL;
+  V->ops->mdot_local      = VecMDot_SeqViennaCL;
+  V->ops->maxpy           = VecMAXPY_SeqViennaCL;
+  V->ops->mdot            = VecMDot_SeqViennaCL;
+  V->ops->aypx            = VecAYPX_SeqViennaCL;
+  V->ops->waxpy           = VecWAXPY_SeqViennaCL;
+  V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
+  V->ops->placearray      = VecPlaceArray_SeqViennaCL;
+  V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
+  V->ops->resetarray      = VecResetArray_SeqViennaCL;
+  V->ops->destroy         = VecDestroy_SeqViennaCL;
+  V->ops->duplicate       = VecDuplicate_SeqViennaCL;
+  //V->ops->conjugate       = VecConjugate_SeqViennaCL;
+
+  ierr = VecViennaCLAllocateCheck(V);CHKERRQ(ierr);
+  V->valid_GPU_array      = PETSC_VIENNACL_GPU;
+  ierr = VecSet(V,0.0);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecCreate_ViennaCL"
+PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
+{
+  PetscErrorCode ierr;
+  PetscMPIInt    size;
+
+  PetscFunctionBegin;
+  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);CHKERRQ(ierr);
+  if (size == 1) {
+    ierr = VecSetType(v,VECSEQVIENNACL);CHKERRQ(ierr);
+  } else {
+    ierr = VecSetType(v,VECMPIVIENNACL);CHKERRQ(ierr);
+  }
+  PetscFunctionReturn(0);
+}

File src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h

View file
  • Ignore whitespace
+#if !defined(__VIENNACLVECIMPL)
+#define __VIENNACLVECIMPL
+
+#include <petsc-private/vecimpl.h>
+
+#include <algorithm>
+#include <vector>
+#include <string>
+
+#include "viennacl/vector.hpp"
+
+typedef viennacl::vector<PetscScalar>    ViennaCLVector;
+
+
+PETSC_INTERN PetscErrorCode VecDotNorm2_SeqViennaCL(Vec,Vec,PetscScalar*, PetscScalar*);
+PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec,Vec,Vec);
+PETSC_INTERN PetscErrorCode VecWAXPY_SeqViennaCL(Vec,PetscScalar,Vec,Vec);
+PETSC_INTERN PetscErrorCode VecMDot_SeqViennaCL(Vec,PetscInt,const Vec[],PetscScalar*);
+PETSC_INTERN PetscErrorCode VecSet_SeqViennaCL(Vec,PetscScalar);
+PETSC_INTERN PetscErrorCode VecMAXPY_SeqViennaCL(Vec,PetscInt,const PetscScalar*,Vec*);
+PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec);
+PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec,Vec,Vec);
+PETSC_INTERN PetscErrorCode VecPlaceArray_SeqViennaCL(Vec,const PetscScalar*);
+PETSC_INTERN PetscErrorCode VecResetArray_SeqViennaCL(Vec);
+PETSC_INTERN PetscErrorCode VecReplaceArray_SeqViennaCL(Vec,const PetscScalar*);
+PETSC_INTERN PetscErrorCode VecDot_SeqViennaCL(Vec,Vec,PetscScalar*);
+PETSC_INTERN PetscErrorCode VecTDot_SeqViennaCL(Vec,Vec,PetscScalar*);
+PETSC_INTERN PetscErrorCode VecScale_SeqViennaCL(Vec,PetscScalar);
+PETSC_INTERN PetscErrorCode VecCopy_SeqViennaCL(Vec,Vec);
+PETSC_INTERN PetscErrorCode VecSwap_SeqViennaCL(Vec,Vec);
+PETSC_INTERN PetscErrorCode VecAXPY_SeqViennaCL(Vec,PetscScalar,Vec);
+PETSC_INTERN PetscErrorCode VecAXPBY_SeqViennaCL(Vec,PetscScalar,PetscScalar,Vec);
+PETSC_INTERN PetscErrorCode VecDuplicate_SeqViennaCL(Vec,Vec*);
+PETSC_INTERN PetscErrorCode VecNorm_SeqViennaCL(Vec,NormType,PetscReal*);
+PETSC_INTERN PetscErrorCode VecViennaCLCopyToGPU(Vec);
+PETSC_INTERN PetscErrorCode VecViennaCLAllocateCheck(Vec);
+PETSC_INTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec);
+PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec);
+PETSC_INTERN PetscErrorCode VecView_Seq(Vec,PetscViewer);
+PETSC_INTERN PetscErrorCode VecDestroy_SeqViennaCL(Vec);
+PETSC_INTERN PetscErrorCode VecAYPX_SeqViennaCL(Vec,PetscScalar,Vec);
+PETSC_INTERN PetscErrorCode VecSetRandom_SeqViennaCL(Vec,PetscRandom);
+
+PETSC_INTERN PetscErrorCode VecViennaCLCopyToGPU_Public(Vec);
+PETSC_INTERN PetscErrorCode VecViennaCLAllocateCheck_Public(Vec);
+
+struct Vec_ViennaCL {
+  viennacl::vector<PetscScalar> *GPUarray;        // this always holds the GPU data
+};
+
+
+/*
+#define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
+#define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory>
+#define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
+#define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>
+
+struct  _p_PetscViennaCLIndices {
+  CUSPINTARRAYCPU sendIndicesCPU;
+  CUSPINTARRAYGPU sendIndicesGPU;
+
+  CUSPINTARRAYCPU recvIndicesCPU;
+  CUSPINTARRAYGPU recvIndicesGPU;
+}; */
+
+
+
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLGetArrayReadWrite"
+PETSC_STATIC_INLINE PetscErrorCode VecViennaCLGetArrayReadWrite(Vec v, ViennaCLVector **a)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  *a   = 0;
+  ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
+  *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLRestoreArrayReadWrite"
+PETSC_STATIC_INLINE PetscErrorCode VecViennaCLRestoreArrayReadWrite(Vec v, ViennaCLVector **a)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  v->valid_GPU_array = PETSC_VIENNACL_GPU;
+
+  ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLGetArrayRead"
+PETSC_STATIC_INLINE PetscErrorCode VecViennaCLGetArrayRead(Vec v, ViennaCLVector **a)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  *a   = 0;
+  ierr = VecViennaCLCopyToGPU(v);CHKERRQ(ierr);
+  *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLRestoreArrayRead"
+PETSC_STATIC_INLINE PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, ViennaCLVector **a)
+{
+  PetscFunctionBegin;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLGetArrayWrite"
+PETSC_STATIC_INLINE PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  *a   = 0;
+  ierr = VecViennaCLAllocateCheck(v);CHKERRQ(ierr);
+  *a   = ((Vec_ViennaCL*)v->spptr)->GPUarray;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "VecViennaCLRestoreArrayWrite"
+PETSC_STATIC_INLINE PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBegin;
+  v->valid_GPU_array = PETSC_VIENNACL_GPU;
+
+  ierr = PetscObjectStateIncrease((PetscObject)v);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+#endif

File src/vec/vec/interface/dlregisvec.c

View file
  • Ignore whitespace
   ierr = PetscLogEventRegister("VecCopyToSome",     VEC_CLASSID,&VEC_CUSPCopyToGPUSome);CHKERRQ(ierr);
   ierr = PetscLogEventRegister("VecCopyFromSome",   VEC_CLASSID,&VEC_CUSPCopyFromGPUSome);CHKERRQ(ierr);
 #endif
+#if defined(PETSC_HAVE_VIENNACL)
+  ierr = PetscLogEventRegister("VecViennaCLCopyTo",     VEC_CLASSID,&VEC_ViennaCLCopyToGPU);CHKERRQ(ierr);
+  ierr = PetscLogEventRegister("VecViennaCLCopyFrom",   VEC_CLASSID,&VEC_ViennaCLCopyFromGPU);CHKERRQ(ierr);
+/*  ierr = PetscLogEventRegister("VecCopyToSome",     VEC_CLASSID,&VEC_ViennaCLCopyToGPUSome);CHKERRQ(ierr); */
+/*  ierr = PetscLogEventRegister("VecCopyFromSome",   VEC_CLASSID,&VEC_ViennaCLCopyFromGPUSome);CHKERRQ(ierr); */
+#endif
   /* Turn off high traffic events by default */
   ierr = PetscLogEventSetActiveAll(VEC_DotBarrier, PETSC_FALSE);CHKERRQ(ierr);
   ierr = PetscLogEventSetActiveAll(VEC_DotNormBarrier, PETSC_FALSE);CHKERRQ(ierr);

File src/vec/vec/interface/rvector.c

View file
  • Ignore whitespace
       ierr = VecCUSPCopyFromGPU(x);CHKERRQ(ierr);
     }
 #endif
+#if defined(PETSC_HAVE_VIENNACL)
+    if (x->valid_GPU_array == PETSC_VIENNACL_GPU || !*((PetscScalar**)x->data)) {
+      ierr = VecViennaCLCopyFromGPU(x);CHKERRQ(ierr);
+    }
+#endif
     *a = *((PetscScalar **)x->data);
   } else {
     ierr = (*x->ops->getarray)(x,a);CHKERRQ(ierr);
       ierr = VecCUSPCopyFromGPU(x);CHKERRQ(ierr);
     }
 #endif
+#if defined(PETSC_HAVE_VIENNACL)
+    if (x->valid_GPU_array == PETSC_VIENNACL_GPU || !*((PetscScalar**)x->data)) {
+      ierr = VecViennaCLCopyFromGPU(x);CHKERRQ(ierr);
+    }
+#endif
     *a = *((PetscScalar **)x->data);
   } else if (x->ops->getarrayread) {
     ierr = (*x->ops->getarrayread)(x,a);CHKERRQ(ierr);
 #if defined(PETSC_HAVE_CUSP)
     x->valid_GPU_array = PETSC_CUSP_CPU;
 #endif
+#if defined(PETSC_HAVE_VIENNACL)
+    x->valid_GPU_array = PETSC_VIENNACL_CPU;
+#endif
   } else {
     ierr = (*x->ops->restorearray)(x,a);CHKERRQ(ierr);
   }
   PetscFunctionBegin;
   PetscValidHeaderSpecific(x,VEC_CLASSID,1);
   if (x->petscnative) {
-#if defined(PETSC_HAVE_CUSP)
-    x->valid_GPU_array = PETSC_CUSP_CPU;
+#if defined(PETSC_HAVE_VIENNACL)
+    x->valid_GPU_array = PETSC_VIENNACL_CPU;
 #endif
   } else if (x->ops->restorearrayread) {
     ierr = (*x->ops->restorearrayread)(x,a);CHKERRQ(ierr);

File src/vec/vec/interface/vecregall.c

View file
  • Ignore whitespace
 PETSC_EXTERN PetscErrorCode VecCreate_MPICUSP(Vec);
 PETSC_EXTERN PetscErrorCode VecCreate_CUSP(Vec);
 #endif
+#if defined(PETSC_HAVE_VIENNACL)
+PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec);
+/* PETSC_EXTERN PetscErrorCode VecCreate_MPICUSP(Vec); */
+PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec);
+#endif
 #if 0
 #if defined(PETSC_HAVE_SIEVE)
 PETSC_EXTERN PetscErrorCode VecCreate_Sieve(Vec);
   ierr = VecRegister(VECMPICUSP,    VecCreate_MPICUSP);CHKERRQ(ierr);
   ierr = VecRegister(VECCUSP,       VecCreate_CUSP);CHKERRQ(ierr);
 #endif
+#if defined PETSC_HAVE_VIENNACL
+  ierr = VecRegisterDynamic(VECSEQVIENNACL,  path, "VecCreate_SeqViennaCL",  VecCreate_SeqViennaCL);CHKERRQ(ierr);
+/*  ierr = VecRegisterDynamic(VECMPIVIENNACL,  path, "VecCreate_MPIViennaCL",  VecCreate_MPIViennaCL);CHKERRQ(ierr); */
+  ierr = VecRegisterDynamic(VECVIENNACL,     path, "VecCreate_ViennaCL",     VecCreate_ViennaCL);CHKERRQ(ierr);
+#endif
 #if 0
 #if defined(PETSC_HAVE_SIEVE)
   ierr = VecRegister(VECSIEVE,      VecCreate_Sieve);CHKERRQ(ierr);

File src/vec/vec/interface/vector.c

View file
  • Ignore whitespace
 PetscLogEvent VEC_SetRandom, VEC_ReduceArithmetic, VEC_ReduceBarrier, VEC_ReduceCommunication,VEC_ReduceBegin,VEC_ReduceEnd,VEC_Ops;
 PetscLogEvent VEC_DotNormBarrier, VEC_DotNorm, VEC_AXPBYPCZ, VEC_CUSPCopyFromGPU, VEC_CUSPCopyToGPU;
 PetscLogEvent VEC_CUSPCopyFromGPUSome, VEC_CUSPCopyToGPUSome;
+PetscLogEvent VEC_ViennaCLCopyFromGPU, VEC_ViennaCLCopyToGPU;
 
 extern PetscErrorCode VecStashGetInfo_Private(VecStash*,PetscInt*,PetscInt*);
 #undef __FUNCT__