Jed Brown avatar Jed Brown committed dd00b65 Merge

Merge branch 'jed/matmatmult-sort'

New version provides in the neighborhood of 20% speedup for MatMatMult
as used in PCGAMG.

* jed/matmatmult-sort:
MatMatMultSymbolic_SeqAIJ_SeqAIJ: switch to "sort" version as default
segbuffer: g++ does not like casting int*restrict* to void*, void** okay
MatMatMult_SeqAIJ_SeqAIJ_Sort: new implementation using char flags and sorting
segbuffer: add PetscSegBufferGetInts() to encourage use of PETSC_RESTRICT
segbuffer: create wrapper structure
segbuffer: add PetscSegBufferGetSize() and PetscSegBufferUnuse()
segbuffer: fix bug in which seg->used was not reset

Comments (0)

Files changed (5)

include/petscsys.h

 typedef struct _n_PetscSegBuffer *PetscSegBuffer;
 PETSC_EXTERN PetscErrorCode PetscSegBufferCreate(PetscInt,PetscInt,PetscSegBuffer*);
 PETSC_EXTERN PetscErrorCode PetscSegBufferDestroy(PetscSegBuffer*);
-PETSC_EXTERN PetscErrorCode PetscSegBufferGet(PetscSegBuffer*,PetscInt,void*);
-PETSC_EXTERN PetscErrorCode PetscSegBufferExtractAlloc(PetscSegBuffer*,void*);
-PETSC_EXTERN PetscErrorCode PetscSegBufferExtractTo(PetscSegBuffer*,void*);
-PETSC_EXTERN PetscErrorCode PetscSegBufferExtractInPlace(PetscSegBuffer*,void*);
+PETSC_EXTERN PetscErrorCode PetscSegBufferGet(PetscSegBuffer,PetscInt,void*);
+PETSC_EXTERN PetscErrorCode PetscSegBufferExtractAlloc(PetscSegBuffer,void*);
+PETSC_EXTERN PetscErrorCode PetscSegBufferExtractTo(PetscSegBuffer,void*);
+PETSC_EXTERN PetscErrorCode PetscSegBufferExtractInPlace(PetscSegBuffer,void*);
+PETSC_EXTERN PetscErrorCode PetscSegBufferGetSize(PetscSegBuffer,PetscInt*);
+PETSC_EXTERN PetscErrorCode PetscSegBufferUnuse(PetscSegBuffer,PetscInt);
+
+/* Type-safe wrapper to encourage use of PETSC_RESTRICT. Does not use PetscFunctionBegin because the error handling
+ * prevents the compiler from completely erasing the stub. This is called in inner loops so it has to be as fast as
+ * possible. */
+PETSC_STATIC_INLINE PetscErrorCode PetscSegBufferGetInts(PetscSegBuffer seg,PetscInt count,PetscInt *PETSC_RESTRICT *slot) {return PetscSegBufferGet(seg,count,(void**)slot);}
 
 /* Reset __FUNCT__ in case the user does not define it themselves */
 #undef __FUNCT__

src/dm/impls/plex/plex.c

   ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&segpack);CHKERRQ(ierr);
   ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&segpart);CHKERRQ(ierr);
   for (rank = rStart; rank < rEnd; ++rank) {
-    PetscInt partSize = 0;
-    PetscInt numPoints, offset, p;
+    PetscInt partSize = 0, numPoints, offset, p, *PETSC_RESTRICT placePoints;
 
     ierr = PetscSectionGetDof(pointSection, rank, &numPoints);CHKERRQ(ierr);
     ierr = PetscSectionGetOffset(pointSection, rank, &offset);CHKERRQ(ierr);
       for (c=0; c<closureSize; c++) {
         PetscInt cpoint = closure[c*2];
         if (!PetscBTLookupSet(bt,cpoint-pStart)) {
-          PetscInt *pt;
+          PetscInt *PETSC_RESTRICT pt;
           partSize++;
-          ierr = PetscSegBufferGet(&segpart,1,&pt);CHKERRQ(ierr);
+          ierr = PetscSegBufferGetInts(segpart,1,&pt);CHKERRQ(ierr);
           *pt = cpoint;
         }
       }
       ierr = DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &closureSize, &closure);CHKERRQ(ierr);
     }
     ierr = PetscSectionSetDof(*section, rank, partSize);CHKERRQ(ierr);
-    ierr = PetscSegBufferGet(&segpack,partSize,&packPoints);CHKERRQ(ierr);
-    ierr = PetscSegBufferExtractTo(&segpart,packPoints);CHKERRQ(ierr);
-    ierr = PetscSortInt(partSize,packPoints);CHKERRQ(ierr);
-    for (p=0; p<partSize; p++) {ierr = PetscBTClear(bt,packPoints[p]-pStart);CHKERRQ(ierr);}
+    ierr = PetscSegBufferGetInts(segpack,partSize,&placePoints);CHKERRQ(ierr);
+    ierr = PetscSegBufferExtractTo(segpart,placePoints);CHKERRQ(ierr);
+    ierr = PetscSortInt(partSize,placePoints);CHKERRQ(ierr);
+    for (p=0; p<partSize; p++) {ierr = PetscBTClear(bt,placePoints[p]-pStart);CHKERRQ(ierr);}
   }
   ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
   ierr = PetscSegBufferDestroy(&segpart);CHKERRQ(ierr);
   ierr = PetscSectionGetStorageSize(*section, &newSize);CHKERRQ(ierr);
   ierr = PetscMalloc(newSize * sizeof(PetscInt), &allPoints);CHKERRQ(ierr);
 
-  ierr = PetscSegBufferExtractInPlace(&segpack,&packPoints);CHKERRQ(ierr);
+  ierr = PetscSegBufferExtractInPlace(segpack,&packPoints);CHKERRQ(ierr);
   for (rank = rStart; rank < rEnd; ++rank) {
     PetscInt numPoints, offset;
 

src/mat/impls/aij/seq/matmatmult.c

 #include <petscbt.h>
 #include <../src/mat/impls/dense/seq/dense.h>
 
+static PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat,Mat,PetscReal,Mat*);
+
 #undef __FUNCT__
 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
 {
   PetscErrorCode ierr;
-  PetscBool      scalable=PETSC_FALSE,scalable_fast=PETSC_FALSE,heap = PETSC_FALSE,btheap = PETSC_FALSE;
+  PetscBool      scalable=PETSC_FALSE,scalable_fast=PETSC_FALSE,heap = PETSC_FALSE,btheap = PETSC_FALSE,llcondensed = PETSC_FALSE;
 
   PetscFunctionBegin;
   if (scall == MAT_INITIAL_MATRIX) {
     ierr = PetscOptionsBool("-matmatmult_scalable_fast","Use a scalable but slower C=A*B","",scalable_fast,&scalable_fast,NULL);CHKERRQ(ierr);
     ierr = PetscOptionsBool("-matmatmult_heap","Use heap implementation of symbolic factorization C=A*B","",heap,&heap,NULL);CHKERRQ(ierr);
     ierr = PetscOptionsBool("-matmatmult_btheap","Use btheap implementation of symbolic factorization C=A*B","",btheap,&btheap,NULL);CHKERRQ(ierr);
+    ierr = PetscOptionsBool("-matmatmult_llcondensed","Use LLCondensed to for symbolic C=A*B","",llcondensed,&llcondensed,NULL);CHKERRQ(ierr);
     ierr = PetscOptionsEnd();CHKERRQ(ierr);
     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
     if (scalable_fast) {
       ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_Heap(A,B,fill,C);CHKERRQ(ierr);
     } else if (btheap) {
       ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_BTHeap(A,B,fill,C);CHKERRQ(ierr);
+    } else if (llcondensed) {
+      ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(A,B,fill,C);CHKERRQ(ierr);
     } else {
       ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
     }
 }
 
 #undef __FUNCT__
-#define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
-PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
+#define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed"
+static PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ_LLCondensed(Mat A,Mat B,PetscReal fill,Mat *C)
 {
   PetscErrorCode     ierr;
   Mat_SeqAIJ         *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
   PetscHeap          h;
 
   PetscFunctionBegin;
-  /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
+  /* Get ci and cj - by merging sorted rows using a heap */
   /*---------------------------------------------------------------------------------------------*/
   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
   ierr  = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
   PetscBT            bt;
 
   PetscFunctionBegin;
-  /* Get ci and cj - same as MatMatMultSymbolic_SeqAIJ_SeqAIJ except using PetscLLxxx_Scalalbe() */
+  /* Get ci and cj - using a heap for the sorted rows, but use BT so that each index is only added once */
   /*---------------------------------------------------------------------------------------------*/
   /* Allocate arrays for fill computation and free space for accumulating nonzero column */
   ierr  = PetscMalloc(((am+1)+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
   PetscFunctionReturn(0);
 }
 
+#undef __FUNCT__
+#define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
+/* concatenate unique entries and then sort */
+PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
+{
+  PetscErrorCode     ierr;
+  Mat_SeqAIJ         *a  = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
+  const PetscInt     *ai = a->i,*bi=b->i,*aj=a->j,*bj=b->j;
+  PetscInt           *ci,*cj;
+  PetscInt           am=A->rmap->N,bn=B->cmap->N,bm=B->rmap->N;
+  PetscReal          afill;
+  PetscInt           i,j,ndouble = 0;
+  PetscSegBuffer     seg,segrow;
+  char               *seen;
+
+  PetscFunctionBegin;
+  ierr  = PetscMalloc((am+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
+  ci[0] = 0;
+
+  /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
+  ierr = PetscSegBufferCreate(sizeof(PetscInt),(PetscInt)(fill*(ai[am]+bi[bm])),&seg);CHKERRQ(ierr);
+  ierr = PetscSegBufferCreate(sizeof(PetscInt),100,&segrow);CHKERRQ(ierr);
+  ierr = PetscMalloc(bn*sizeof(char),&seen);CHKERRQ(ierr);
+  ierr = PetscMemzero(seen,bn*sizeof(char));CHKERRQ(ierr);
+
+  /* Determine ci and cj */
+  for (i=0; i<am; i++) {
+    const PetscInt anzi  = ai[i+1] - ai[i]; /* number of nonzeros in this row of A, this is the number of rows of B that we merge */
+    const PetscInt *acol = aj + ai[i]; /* column indices of nonzero entries in this row */
+    PetscInt packlen = 0,*PETSC_RESTRICT crow;
+    /* Pack segrow */
+    for (j=0; j<anzi; j++) {
+      PetscInt brow = acol[j],bjstart = bi[brow],bjend = bi[brow+1],k;
+      for (k=bjstart; k<bjend; k++) {
+        PetscInt bcol = bj[k];
+        if (!seen[bcol]) { /* new entry */
+          PetscInt *PETSC_RESTRICT slot;
+          ierr = PetscSegBufferGetInts(segrow,1,&slot);CHKERRQ(ierr);
+          *slot = bcol;
+          seen[bcol] = 1;
+          packlen++;
+        }
+      }
+    }
+    ierr = PetscSegBufferGetInts(seg,packlen,&crow);CHKERRQ(ierr);
+    ierr = PetscSegBufferExtractTo(segrow,crow);CHKERRQ(ierr);
+    ierr = PetscSortInt(packlen,crow);CHKERRQ(ierr);
+    ci[i+1] = ci[i] + packlen;
+    for (j=0; j<packlen; j++) seen[crow[j]] = 0;
+  }
+  ierr = PetscSegBufferDestroy(&segrow);CHKERRQ(ierr);
+  ierr = PetscFree(seen);CHKERRQ(ierr);
+
+  /* Column indices are in the segmented buffer */
+  ierr = PetscSegBufferExtractAlloc(seg,&cj);CHKERRQ(ierr);
+  ierr = PetscSegBufferDestroy(&seg);CHKERRQ(ierr);
+
+  /* put together the new symbolic matrix */
+  ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),am,bn,ci,cj,NULL,C);CHKERRQ(ierr);
+
+  (*C)->rmap->bs = A->rmap->bs;
+  (*C)->cmap->bs = B->cmap->bs;
+
+  /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
+  /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
+  c          = (Mat_SeqAIJ*)((*C)->data);
+  c->free_a  = PETSC_TRUE;
+  c->free_ij = PETSC_TRUE;
+  c->nonew   = 0;
+
+  (*C)->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
+
+  /* set MatInfo */
+  afill = (PetscReal)ci[am]/(ai[am]+bi[bm]) + 1.e-5;
+  if (afill < 1.0) afill = 1.0;
+  c->maxnz                     = ci[am];
+  c->nz                        = ci[am];
+  (*C)->info.mallocs           = ndouble;
+  (*C)->info.fill_ratio_given  = fill;
+  (*C)->info.fill_ratio_needed = afill;
+
+#if defined(PETSC_USE_INFO)
+  if (ci[am]) {
+    ierr = PetscInfo3((*C),"Reallocs %D; Fill ratio: given %G needed %G.\n",ndouble,fill,afill);CHKERRQ(ierr);
+    ierr = PetscInfo1((*C),"Use MatMatMult(A,B,MatReuse,%G,&C) for best performance.;\n",afill);CHKERRQ(ierr);
+  } else {
+    ierr = PetscInfo((*C),"Empty matrix product\n");CHKERRQ(ierr);
+  }
+#endif
+  PetscFunctionReturn(0);
+}
+
 /* This routine is not used. Should be removed! */
 #undef __FUNCT__
 #define __FUNCT__ "MatMatTransposeMult_SeqAIJ_SeqAIJ"

src/sys/utils/mpits.c

     if (flag) {                 /* incoming message */
       PetscMPIInt *recvrank;
       void        *buf;
-      ierr      = PetscSegBufferGet(&segrank,1,&recvrank);CHKERRQ(ierr);
-      ierr      = PetscSegBufferGet(&segdata,count,&buf);CHKERRQ(ierr);
+      ierr      = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr);
+      ierr      = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr);
       *recvrank = status.MPI_SOURCE;
       ierr      = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
       nrecvs++;
     }
   }
   *nfrom = nrecvs;
-  ierr   = PetscSegBufferExtractAlloc(&segrank,fromranks);CHKERRQ(ierr);
+  ierr   = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr);
   ierr   = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr);
-  ierr   = PetscSegBufferExtractAlloc(&segdata,fromdata);CHKERRQ(ierr);
+  ierr   = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr);
   ierr   = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr);
   PetscFunctionReturn(0);
 }

src/sys/utils/segbuffer.c

 #include <petscsys.h>
 
-/* Segmented (extendable) array implementation */
-struct _n_PetscSegBuffer {
-  PetscInt unitbytes;
+struct _PetscSegBufferLink {
+  struct _PetscSegBufferLink *tail;
   PetscInt alloc;
   PetscInt used;
   PetscInt tailused;
-  PetscSegBuffer tail;
   union {                       /* Dummy types to ensure alignment */
     PetscReal dummy_real;
     PetscInt  dummy_int;
-    char      array[1];
+    char      array[1];         /* This array is over-allocated for the size of the link */
   } u;
 };
 
+/* Segmented (extendable) array implementation */
+struct _n_PetscSegBuffer {
+  struct _PetscSegBufferLink *head;
+  PetscInt unitbytes;
+};
+
 #undef __FUNCT__
 #define __FUNCT__ "PetscSegBufferAlloc_Private"
-static PetscErrorCode PetscSegBufferAlloc_Private(PetscSegBuffer *seg,PetscInt count)
+static PetscErrorCode PetscSegBufferAlloc_Private(PetscSegBuffer seg,PetscInt count)
 {
-  PetscErrorCode ierr;
-  PetscSegBuffer newseg,s;
-  PetscInt       alloc;
+  PetscErrorCode     ierr;
+  PetscInt           alloc;
+  struct _PetscSegBufferLink *newlink,*s;
 
   PetscFunctionBegin;
-  s = *seg;
+  s = seg->head;
   /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */
-  alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused));
-  ierr  = PetscMalloc(offsetof(struct _n_PetscSegBuffer,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr);
-  ierr  = PetscMemzero(newseg,offsetof(struct _n_PetscSegBuffer,u));CHKERRQ(ierr);
-
-  newseg->unitbytes = s->unitbytes;
-  newseg->tailused  = s->used + s->tailused;
-  newseg->tail      = s;
-  newseg->alloc     = alloc;
-  *seg              = newseg;
+  alloc = PetscMax(s->used+count,PetscMin(1000000/seg->unitbytes+1,s->alloc+s->tailused));
+  ierr  = PetscMalloc(offsetof(struct _PetscSegBufferLink,u)+alloc*seg->unitbytes,&newlink);CHKERRQ(ierr);
+  ierr  = PetscMemzero(newlink,offsetof(struct _PetscSegBufferLink,u));CHKERRQ(ierr);
+
+  newlink->tailused  = s->used + s->tailused;
+  newlink->tail      = s;
+  newlink->alloc     = alloc;
+  seg->head = newlink;
   PetscFunctionReturn(0);
 }
 
 PetscErrorCode PetscSegBufferCreate(PetscInt unitbytes,PetscInt expected,PetscSegBuffer *seg)
 {
   PetscErrorCode ierr;
+  struct _PetscSegBufferLink *head;
 
   PetscFunctionBegin;
-  ierr = PetscMalloc(offsetof(struct _n_PetscSegBuffer,u)+expected*unitbytes,seg);CHKERRQ(ierr);
-  ierr = PetscMemzero(*seg,offsetof(struct _n_PetscSegBuffer,u));CHKERRQ(ierr);
+  ierr = PetscMalloc(sizeof(struct _n_PetscSegBuffer),seg);CHKERRQ(ierr);
+  ierr = PetscMalloc(offsetof(struct _PetscSegBufferLink,u)+expected*unitbytes,&head);CHKERRQ(ierr);
+  ierr = PetscMemzero(head,offsetof(struct _PetscSegBufferLink,u));CHKERRQ(ierr);
 
+  head->alloc       = expected;
   (*seg)->unitbytes = unitbytes;
-  (*seg)->alloc     = expected;
+  (*seg)->head      = head;
   PetscFunctionReturn(0);
 }
 
 
 .seealso: PetscSegBufferCreate(), PetscSegBufferExtractAlloc(), PetscSegBufferExtractTo(), PetscSegBufferExtractInPlace(), PetscSegBufferDestroy()
 @*/
-PetscErrorCode PetscSegBufferGet(PetscSegBuffer *seg,PetscInt count,void *buf)
+PetscErrorCode PetscSegBufferGet(PetscSegBuffer seg,PetscInt count,void *buf)
 {
   PetscErrorCode ierr;
-  PetscSegBuffer s;
+  struct _PetscSegBufferLink *s;
 
   PetscFunctionBegin;
-  s = *seg;
+  s = seg->head;
   if (PetscUnlikely(s->used + count > s->alloc)) {ierr = PetscSegBufferAlloc_Private(seg,count);CHKERRQ(ierr);}
-  s = *seg;
-  *(char**)buf = &s->u.array[s->used*s->unitbytes];
+  s = seg->head;
+  *(char**)buf = &s->u.array[s->used*seg->unitbytes];
   s->used += count;
   PetscFunctionReturn(0);
 }
 @*/
 PetscErrorCode PetscSegBufferDestroy(PetscSegBuffer *seg)
 {
-  PetscErrorCode ierr;
-  PetscSegBuffer s;
+  PetscErrorCode             ierr;
+  struct _PetscSegBufferLink *s;
 
   PetscFunctionBegin;
-  for (s=*seg; s;) {
-    PetscSegBuffer tail = s->tail;
+  for (s=(*seg)->head; s;) {
+    struct _PetscSegBufferLink *tail = s->tail;
     ierr = PetscFree(s);CHKERRQ(ierr);
     s = tail;
   }
-  *seg = NULL;
+  ierr = PetscFree(*seg);CHKERRQ(ierr);
   PetscFunctionReturn(0);
 }
 
 
 .seealso: PetscSegBufferCreate(), PetscSegBufferGet(), PetscSegBufferDestroy(), PetscSegBufferExtractAlloc(), PetscSegBufferExtractInPlace()
 @*/
-PetscErrorCode PetscSegBufferExtractTo(PetscSegBuffer *seg,void *contig)
+PetscErrorCode PetscSegBufferExtractTo(PetscSegBuffer seg,void *contig)
 {
-  PetscErrorCode ierr;
-  PetscInt       unitbytes;
-  PetscSegBuffer s,t;
-  char           *ptr;
+  PetscErrorCode             ierr;
+  PetscInt                   unitbytes;
+  struct _PetscSegBufferLink *s,*t;
+  char                       *ptr;
 
   PetscFunctionBegin;
-  s = *seg;
-
-  unitbytes = s->unitbytes;
-
+  unitbytes = seg->unitbytes;
+  s = seg->head;
   ptr  = ((char*)contig) + s->tailused*unitbytes;
   ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr);
   for (t=s->tail; t;) {
-    PetscSegBuffer tail = t->tail;
+    struct _PetscSegBufferLink *tail = t->tail;
     ptr -= t->used*unitbytes;
     ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr);
     ierr = PetscFree(t);CHKERRQ(ierr);
 
 .seealso: PetscSegBufferCreate(), PetscSegBufferGet(), PetscSegBufferDestroy(), PetscSegBufferExtractTo(), PetscSegBufferExtractInPlace()
 @*/
-PetscErrorCode PetscSegBufferExtractAlloc(PetscSegBuffer *seg,void *contiguous)
+PetscErrorCode PetscSegBufferExtractAlloc(PetscSegBuffer seg,void *contiguous)
 {
-  PetscErrorCode ierr;
-  PetscSegBuffer s;
-  void           *contig;
+  PetscErrorCode             ierr;
+  struct _PetscSegBufferLink *s;
+  void                       *contig;
 
   PetscFunctionBegin;
-  s = *seg;
+  s = seg->head;
 
-  ierr = PetscMalloc((s->used+s->tailused)*s->unitbytes,&contig);CHKERRQ(ierr);
+  ierr = PetscMalloc((s->used+s->tailused)*seg->unitbytes,&contig);CHKERRQ(ierr);
   ierr = PetscSegBufferExtractTo(seg,contig);CHKERRQ(ierr);
   *(void**)contiguous = contig;
   PetscFunctionReturn(0);
 /*@C
    PetscSegBufferExtractInPlace - extract in-place contiguous representation of data and reset segmented buffer for reuse
 
-   Collective
+   Not Collective
 
    Input Arguments:
 .  seg - segmented buffer object
 
 .seealso: PetscSegBufferExtractAlloc(), PetscSegBufferExtractTo()
 @*/
-PetscErrorCode PetscSegBufferExtractInPlace(PetscSegBuffer *seg,void *contig)
+PetscErrorCode PetscSegBufferExtractInPlace(PetscSegBuffer seg,void *contig)
 {
   PetscErrorCode ierr;
+  struct _PetscSegBufferLink *head;
 
   PetscFunctionBegin;
-  if (!(*seg)->tail) {
-    *(char**)contig = (*seg)->u.array;
-  } else {
-    PetscSegBuffer s = *seg,newseg;
-
-    ierr = PetscSegBufferCreate(s->unitbytes,s->used+s->tailused,&newseg);CHKERRQ(ierr);
-    ierr = PetscSegBufferExtractTo(seg,newseg->u.array);CHKERRQ(ierr);
-    ierr = PetscSegBufferDestroy(seg);CHKERRQ(ierr);
-    *seg = newseg;
-    *(void**)contig = newseg->u.array;
+  head = seg->head;
+  if (PetscUnlikely(head->tail)) {
+    PetscSegBuffer newseg;
+
+    ierr = PetscSegBufferCreate(seg->unitbytes,head->used+head->tailused,&newseg);CHKERRQ(ierr);
+    ierr = PetscSegBufferExtractTo(seg,newseg->head->u.array);CHKERRQ(ierr);
+    seg->head = newseg->head;
+    newseg->head = head;
+    ierr = PetscSegBufferDestroy(&newseg);CHKERRQ(ierr);
+    head = seg->head;
   }
+  *(char**)contig = head->u.array;
+  head->used = 0;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "PetscSegBufferGetSize"
+/*@C
+   PetscSegBufferGetSize - get currently used size of segmented buffer
+
+   Not Collective
+
+   Input Arguments:
+.  seg - segmented buffer object
+
+   Output Arguments:
+.  usedsize - number of used units
+
+   Level: developer
+
+.seealso: PetscSegBufferExtractAlloc(), PetscSegBufferExtractTo(), PetscSegBufferCreate(), PetscSegBufferGet()
+@*/
+PetscErrorCode PetscSegBufferGetSize(PetscSegBuffer seg,PetscInt *usedsize)
+{
+
+  PetscFunctionBegin;
+  *usedsize = seg->head->tailused + seg->head->used;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "PetscSegBufferUnuse"
+/*@C
+   PetscSegBufferUnuse - return some unused entries obtained with an overzealous PetscSegBufferGet()
+
+   Not Collective
+
+   Input Arguments:
++  seg - segmented buffer object
+-  unused - number of unused units
+
+   Level: developer
+
+.seealso: PetscSegBufferCreate(), PetscSegBufferGet()
+@*/
+PetscErrorCode PetscSegBufferUnuse(PetscSegBuffer seg,PetscInt unused)
+{
+  struct _PetscSegBufferLink *head;
+
+  PetscFunctionBegin;
+  head = seg->head;
+  if (PetscUnlikely(head->used < unused)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Attempt to return more unused entries (%D) than previously gotten (%D)",unused,head->used);
+  head->used -= unused;
   PetscFunctionReturn(0);
 }
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.