Commits

Nathan Collier committed 22c1fc0

initial checkin

Comments (0)

Files changed (44)

+c
+c     This subroutine modifies the local stiffness matrix and load vector
+c       to account for constrained nodes. At present we only allow for no
+c       constraint or for zero displacement. Future work will include 
+c       allowing for prescribed nonzero displacements.
+c
+c     This is for the 3D version.
+c
+c
+c     January 20, 2004
+c
+c
+c     J. Austin Cottrell, III
+c     Jurijs Bazilevs
+c
+c     CAM Graduate Students
+c     Institute for Computational Engineering Science
+c     The University of Texas at Austin
+
+      subroutine BCLhs2(iel, xKebe, Rhs, counter)
+      
+      use aAdjKeep
+      
+      include "common.h"
+      
+      integer iel, aa, bb, cc, counter
+      real*8 xKebe(NEL_NZA*NSD*NSD*NSHL*NSHL), Rhs(NEL_NZA*NSD*NSHL) 
+      integer offsetL 
+      integer offsetR 
+      
+      offsetL = (counter-1)*NSHL*NSHL*NSD*NSD
+      offsetR = (counter-1)*NSHL*NSD            
+      
+c...  loop through local nodes
+      
+      do aa = 1, NSHL
+        
+        cc = IEN(iel,aa)        ! global node number of local node aa
+        
+        
+c...    Check status of constraint in x direction
+        if (IBC(cc,1).eq.1) then 
+          
+          
+c...      update matrix to account for constraint
+          
+          do bb = 1, NSHL       ! project away row (X-dir)             
+            
+c            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+c     &       ((bb-1)*NSD*NSD)+1) = 0d+0
+c            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+c     &       ((bb-1)*NSD*NSD)+2) = 0d+0
+c            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+c     &       ((bb-1)*NSD*NSD)+3) = 0d+0
+     
+		    xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+     &				((bb-1)*NSD)+1) = 0d+0
+            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+     & 				((bb-1)*NSD)+2) = 0d+0
+            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+     &				((bb-1)*NSD)+3) = 0d+0
+            
+          enddo
+          
+          do bb = 1, NSHL       !project away column (X-dir)
+            
+c            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+c     &       ((aa-1)*NSD*NSD)+1) = 0d+0
+c            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+c     &       ((aa-1)*NSD*NSD)+4) = 0d+0
+c            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+c     &       ((aa-1)*NSD*NSD)+7) = 0d+0
+     
+   			xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+     &				((aa-1)*NSD)+1) = 0d+0
+            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &				((aa-1)*NSD)+1) = 0d+0
+            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     & 				((aa-1)*NSD)+1) = 0d+0
+            
+          enddo
+          
+          	xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+     &			((aa-1)*NSD)+1) = 1d+0
+          Rhs(offsetR + aa) = 0d+0      ! rhs
+          
+        endif
+        
+        
+c...    Check for constraint in y direction
+        
+        if (IBC(cc,2).eq.1) then
+          
+          
+          do bb = 1, NSHL       ! project away row (Y-dir)             
+            
+c            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+c     &       ((bb-1)*NSD*NSD)+4) = 0d+0
+c            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+c     &       ((bb-1)*NSD*NSD)+5) = 0d+0
+c            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+c     &       ((bb-1)*NSD*NSD)+6) = 0d+0
+     
+ 		    xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &				((bb-1)*NSD)+1) = 0d+0
+            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     & 				((bb-1)*NSD)+2) = 0d+0
+            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &				((bb-1)*NSD)+3) = 0d+0
+            
+          enddo
+          
+          do bb = 1, NSHL       !project away column (Y-dir)
+            
+c            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+c     &       ((aa-1)*NSD*NSD)+2) = 0d+0
+c            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+c     &       ((aa-1)*NSD*NSD)+5) = 0d+0
+c            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+c     &       ((aa-1)*NSD*NSD)+8) = 0d+0
+     
+            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+     &				((aa-1)*NSD)+2) = 0d+0
+            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     & 				((aa-1)*NSD)+2) = 0d+0
+            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     & 				((aa-1)*NSD)+2) = 0d+0
+     
+            
+          enddo
+          
+          xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+ (NSHL*NSD)+
+     &       ((aa-1)*NSD)+2) = 1d+0
+          Rhs(offsetR + NSHL + aa) = 0d+0      ! rhs                    
+          
+        endif
+        
+c...    Check for constraint in z direction
+        
+        if (IBC(cc,3).eq.1) then
+          
+          
+          do bb = 1, NSHL       ! project away row (Y-dir)                         
+          
+c            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+c     &       ((bb-1)*NSD*NSD)+7) = 0d+0    
+c            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+c     &       ((bb-1)*NSD*NSD)+8) = 0d+0
+c            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+
+c     &       ((bb-1)*NSD*NSD)+9) = 0d+0
+     
+		    xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &				((bb-1)*NSD)+1) = 0d+0
+            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     & 				((bb-1)*NSD)+2) = 0d+0
+            xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &				((bb-1)*NSD)+3) = 0d+0
+            
+          enddo
+          
+          do bb = 1, NSHL       !project away column (Y-dir)
+            
+c             xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+c     &       ((aa-1)*NSD*NSD)+3) = 0d+0
+c            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+c     &       ((aa-1)*NSD*NSD)+6) = 0d+0
+c            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+c     &       ((aa-1)*NSD*NSD)+9) = 0d+0
+     
+	  	    xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+
+     &				((aa-1)*NSD)+3) = 0d+0
+            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     & 				((aa-1)*NSD)+3) = 0d+0
+            xKebe(offsetL+((bb-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     & 				((aa-1)*NSD)+3) = 0d+0
+            
+          enddo
+          
+           xKebe(offsetL+((aa-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &       ((aa-1)*NSD)+3) = 1d+0
+          Rhs(offsetR + (2*NSHL) + aa) = 0d+0      ! rhs
+          
+        endif         
+        
+        
+      enddo
+      
+      return
+      end
+      
+      
+c
+c     This subroutine modifies the local stiffness matrix and load vector
+c       to account for constrained nodes. At present we only allow for no
+c       constraint or for zero displacement. Future work will include 
+c       allowing for prescribed nonzero displacements.
+c
+c     This is for the 3D version.
+c
+c
+c     January 20, 2004
+c
+c
+c     J. Austin Cottrell, III
+c     Jurijs Bazilevs
+c
+c     CAM Graduate Students
+c     Institute for Computational Engineering Science
+c     The University of Texas at Austin
+
+      subroutine BCLhs_3D(iel, xKebe, Rhs)
+      
+      use aAdjKeep
+      
+      include "common.h"
+      
+      integer iel, aa, bb, cc
+      real*8 xKebe(NSD*NSD,NSHL,NSHL), Rhs(NSD,NSHL) 
+      
+      
+      
+c...  loop through local nodes
+      
+      do aa = 1, NSHL
+        
+        cc = IEN(iel,aa)        ! global node number of local node aa
+        
+        
+c...    Check status of constraint in x direction
+        if (IBC(cc,1).eq.1) then 
+          
+          
+c...      update matrix to account for constraint
+          
+          do bb = 1, NSHL       ! project away row (X-dir)             
+            
+            xKebe(1,aa,bb) = 0d+0
+            xKebe(2,aa,bb) = 0d+0
+            xKebe(3,aa,bb) = 0d+0
+            
+          enddo
+          
+          do bb = 1, NSHL       !project away column (X-dir)
+            
+            xKebe(1,bb,aa) = 0d+0
+            xKebe(4,bb,aa) = 0d+0
+            xKebe(7,bb,aa) = 0d+0
+            
+          enddo
+          
+          xKebe(1,aa,aa) = 1d+0
+          Rhs(1,aa) = 0d+0      ! rhs
+          
+        endif
+        
+        
+c...    Check for constraint in y direction
+        
+        if (IBC(cc,2).eq.1) then
+          
+          
+          do bb = 1, NSHL       ! project away row (Y-dir)             
+            
+            xKebe(4,aa,bb) = 0d+0
+            xKebe(5,aa,bb) = 0d+0
+            xKebe(6,aa,bb) = 0d+0
+            
+          enddo
+          
+          do bb = 1, NSHL       !project away column (Y-dir)
+            
+            xKebe(2,bb,aa) = 0d+0
+            xKebe(5,bb,aa) = 0d+0
+            xKebe(8,bb,aa) = 0d+0
+            
+          enddo
+          
+          xKebe(5,aa,aa) = 1d+0           
+          Rhs(2,aa) = 0d+0      ! rhs
+          
+        endif
+        
+c...    Check for constraint in z direction
+        
+        if (IBC(cc,3).eq.1) then
+          
+          
+          do bb = 1, NSHL       ! project away row (Y-dir)             
+            
+            xKebe(7,aa,bb) = 0d+0
+            xKebe(8,aa,bb) = 0d+0
+            xKebe(9,aa,bb) = 0d+0
+            
+          enddo
+          
+          do bb = 1, NSHL       !project away column (Y-dir)
+            
+            xKebe(3,bb,aa) = 0d+0
+            xKebe(6,bb,aa) = 0d+0
+            xKebe(9,bb,aa) = 0d+0
+            
+          enddo
+          
+          xKebe(9,aa,aa) = 1d+0           
+          Rhs(3,aa) = 0d+0      ! rhs
+          
+        endif         
+        
+        
+      enddo
+      
+      return
+      end
+      
+      

FaceAssembly_3D.f

+c     This subroutine calculates the contributions to the left hand
+c      side forcing vector due to boundary faces.
+c
+c     This is for the 3D case.
+c     
+c     
+c     July 1, 2003
+c     J. Austin Cottrell
+c     CES Graduate Student
+c     Texas Institute for Computational Engineering Science
+c     University of Texas at Austin
+c
+c     Modified from 2D "BdryEdjAss.f"  
+c     January 20, 2004
+c     J. Austin Cottrell
+
+
+      subroutine FaceAssembly_3D
+      
+      use aAdjKeep
+
+      include "common.h"
+      
+      
+      integer ifac, nshb, igaussb, jgaussb, i, g, ni, nj,aa,
+     &     nk, area_flag
+      real*8  sigx,sigy,sigz,gp(NGAUSS), gw(NGAUSS), da, gwt
+      real*8  Rhsb(NSD,NSHL), shb(NSHL), shbg(NSHL,NSD), nor(NSD),
+     &  Pres, norsurf,xl(3)
+    
+c     shb will be the shape function array while shbg will hold the
+c     gradients of the shape functions
+
+      
+c     *************************************************************
+c...  Loop over Faces
+      norsurf = 0d+0
+      
+      do ifac = 1, NFACE
+        area_flag = 0           ! initialize flag
+        
+c...    Check if face is of nonzero area
+        
+        ni = INN(FACE_IEN(ifac,1),1) ! get NURBS coordinates
+        nj = INN(FACE_IEN(ifac,1),2)
+        nk = INN(FACE_IEN(ifac,1),3)
+        
+        if ((FACE_OR(ifac).eq.1).or.(FACE_OR(ifac).eq.6)) then
+          da = (U_KNOT(ni+1) - U_KNOT(ni))* 
+     &      (V_KNOT(nj+1) - V_KNOT(nj))/4d+0 ! change in gwt
+                                ! due to mapping from
+                                ! [-1,1] x [-1,1]
+          if((U_KNOT(ni).eq.(U_KNOT(ni+1))).or.
+     &      (V_KNOT(nj).eq.(V_KNOT(nj+1))) ) then
+            area_flag = 1       ! indicates face has zero area
+          endif
+          
+          
+        else if ((FACE_OR(ifac).eq.2).or.(FACE_OR(ifac).eq.4)) then
+          da = (U_KNOT(ni+1) - U_KNOT(ni))* 
+     &      (W_KNOT(nk+1) - W_KNOT(nk))/4d+0 ! change in gwt
+                                ! due to mapping from
+                                ! [-1,1] x [-1,1]
+          if((U_KNOT(ni).eq.(U_KNOT(ni+1))).or.
+     &      (W_KNOT(nk).eq.(W_KNOT(nk+1))) ) then
+            area_flag = 1       ! indicates face has zero area
+          endif
+          
+        else if ((FACE_OR(ifac).eq.3).or.(FACE_OR(ifac).eq.5)) then
+          da = (V_KNOT(nj+1) - V_KNOT(nj))* 
+     &      (W_KNOT(nk+1) - W_KNOT(nk))/4d+0 ! change in gwt
+                                ! due to mapping from
+                                ! [-1,1] x [-1,1]
+          if((V_KNOT(nj).eq.(V_KNOT(nj+1))).or.
+     &      (W_KNOT(nk).eq.(W_KNOT(nk+1))) ) then
+            area_flag = 1       ! indicates face has zero area
+          endif
+          
+        endif
+        
+        
+c...    If face has zero area, skip entirely
+        if (area_flag.eq.0) then
+          
+          
+c..       Check for loads applied to the faces
+          if (((IBC_FACE(ifac,1).eq.2).or.(IBC_FACE(ifac,2).eq.2)).or.
+     &      (IBC_FACE(ifac,3).eq.2)) then
+            
+                                ! skip boundary integrals if load not prescr.
+            
+            
+c...        set up element parameters
+            
+            
+      
+c...          get Gauss Point/Weight Arrays
+            
+            call genGPandGW(gp,gw)
+
+            Rhsb = 0d+0
+            shb = 0d+0
+            shbg = 0d+0
+            
+c...        Loop over integration points
+            
+            do igaussb = 1, NGAUSS
+              do jgaussb = 1, NGAUSS
+                
+c...            Get Boundary Shape functions and set Jacobian
+                
+                call eval_FACE_3D(ifac,gp(igaussb), gp(jgaussb),
+     &            ni, nj, nk, shb, shbg, nor)
+                                ! eval_FACE_3D actually does not calculate the gradient
+                                ! This has been fixed in the much more sophisticated
+                                ! multi-patch code. There has been no need for it here
+                                ! as the single patch code is no more than an
+                                ! instructional toy.
+                                !   - jac3, Sept. 27, 2005
+
+                
+c...            assemble local load vector              
+                gwt = gw(igaussb)*gw(jgaussb)*da
+
+c...            Get boundary loads for this edge
+                sigx = LD_FACE(ifac,1)
+                sigy = LD_FACE(ifac,2)
+                sigz = LD_FACE(ifac,3)                
+!                   call e3bRhs_3D(shb,sigx,sigy,sigz,gwt,Rhsb)
+                                !     User-Defined Pressurization
+                Pres = 1d3      !     Set Value of Pres
+                call e3bRhs_3D_pres(shb,nor,gwt,Rhsb,Pres,norsurf)
+
+             enddo
+          enddo
+          
+          
+c...      check if displacements are prescribed 
+c...      and assemble into global force vector           
+          do i = 1,NSHL
+            g = FACE_IEN(ifac,i) ! global node number     
+            if(IBC(g,1).eq.1) Rhsb(1,i) = 0d+0 ! in x-direction               
+            if(IBC(g,2).eq.1) Rhsb(2,i) = 0d+0 ! in y-direction             
+            if(IBC(g,3).eq.1) Rhsb(3,i) = 0d+0 ! in z-direction
+            
+            ! assemble
+            RHSG(g,1) = RHSG(g,1) + Rhsb(1,i)
+            RHSG(g,2) = RHSG(g,2) + Rhsb(2,i)
+            RHSG(g,3) = RHSG(g,3) + Rhsb(3,i)            
+          enddo
+          
+        endif
+      endif
+      enddo
+      
+      write(*,*) "Surface Area=    ", norsurf
+      
+      return
+      end
+      
+      
+
+      
+      
+c
+c Assemble global stiffness matrix from local stiffness matrix, taking 
+c     advantage of sparsity. Currently assumes 2d. Will be expanded in 
+c     future.
+c
+c modified from original code of Jurijs Bazilevs, who got it on the Russian
+c     black market in exchange for a 1 lbs. can of caviar.
+c
+c July 1, 2003 
+c J. Austin Cottrell, III
+c
+c Modified for 3D Nov. 15, 2003
+c Jurijs Bazilevs
+
+      subroutine FillSparseMat2(iel, col, row, xKebe, counter) 
+      
+      use aAdjKeep
+      
+      include "common.h"
+      
+      integer iel, row(nnodz*8*(P+1)*(Q+1)*(R+1)), col(nnodz+1)
+      
+      integer a, b, c, d, ee, n, k, locn, i
+      
+      integer offsetL, counter
+      
+      real*8 xKebe(NEL_NZA*NSD*NSD*NSHL*NSHL)      
+      
+      
+      
+      offsetL = (counter-1)*NSHL*NSHL*NSD*NSD         
+          
+      do a = 1, NSHL
+        i = IEN(iel,a)
+        c = col(i)
+        n = col(i+1) - c
+        do b = 1, NSHL
+          
+          call SparseMatLoc_3D (row(c), n, IEN(iel,b), locn) 
+          
+c			print *, iel, row(c), n, IEN(iel,b), locn
+          
+          k = locn + c - 1
+          
+          LHSK(1,k) = LHSK(1,k) +
+     &     xKebe(offsetL+((a-1)*NSHL*NSD*NSD)+
+     &			((b-1)*NSD)+1)
+          LHSK(2,k) = LHSK(2,k) +
+     &     xKebe(offsetL+((a-1)*NSHL*NSD*NSD)+
+     &			((b-1)*NSD)+2)
+          LHSK(3,k) = LHSK(3,k) +
+     &     xKebe(offsetL+((a-1)*NSHL*NSD*NSD)+
+     &			((b-1)*NSD)+3)
+     
+     
+          LHSK(4,k) = LHSK(4,k) +
+     &     xKebe(offsetL+((a-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &			((b-1)*NSD)+1)
+          LHSK(5,k) = LHSK(5,k) + 
+     &     xKebe(offsetL+((a-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &			((b-1)*NSD)+2)
+          LHSK(6,k) = LHSK(6,k) + 
+     &     xKebe(offsetL+((a-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &			((b-1)*NSD)+3)
+     
+     
+          LHSK(7,k) = LHSK(7,k) + 
+     &     xKebe(offsetL+((a-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &			((b-1)*NSD)+1)
+          LHSK(8,k) = LHSK(8,k) + 
+     &     xKebe(offsetL+((a-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &			((b-1)*NSD)+2)
+          LHSK(9,k) = LHSK(9,k) + 
+     &     xKebe(offsetL+((a-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &			((b-1)*NSD)+3)            
+     
+     
+c 		print *, LHSK(1,k), LHSK(2,k), LHSK(3,k)
+c		print *, LHSK(4,k), LHSK(5,k), LHSK(6,k)               		
+c		print *, LHSK(7,k), LHSK(8,k), LHSK(9,k)  
+c		print * 
+          
+        enddo
+        
+      enddo
+      
+      return
+      end

FillSparseMat_3D.f

+c
+c Assemble global stiffness matrix from local stiffness matrix, taking 
+c     advantage of sparsity. Currently assumes 2d. Will be expanded in 
+c     future.
+c
+c modified from original code of Jurijs Bazilevs, who got it on the Russian
+c     black market in exchange for a 1 lbs. can of caviar.
+c
+c July 1, 2003 
+c J. Austin Cottrell, III
+c
+c Modified for 3D Nov. 15, 2003
+c Jurijs Bazilevs
+
+      subroutine FillSparseMat_3D(iel, col, row, xKebe) 
+      
+      use aAdjKeep
+      
+      include "common.h"
+      
+      integer iel, row(nnodz*8*(P+1)*(Q+1)*(R+1)), col(nnodz+1)
+      
+      integer a, b, c, d, ee, n, k, locn, i
+      
+      real*8 xKebe(NSD*NSD,NSHL,NSHL)
+      
+      
+      do a = 1, NSHL
+        i = IEN(iel,a)
+        c = col(i)
+        n = col(i+1) - c
+        do b = 1, NSHL
+          
+          call SparseMatLoc_3D (row(c), n, IEN(iel,b), locn) 
+          
+          k = locn + c - 1
+          
+          LHSK(1,k) = LHSK(1,k) + xKebe(1,a,b)
+          LHSK(2,k) = LHSK(2,k) + xKebe(2,a,b)
+          LHSK(3,k) = LHSK(3,k) + xKebe(3,a,b)
+          LHSK(4,k) = LHSK(4,k) + xKebe(4,a,b)
+          LHSK(5,k) = LHSK(5,k) + xKebe(5,a,b)
+          LHSK(6,k) = LHSK(6,k) + xKebe(6,a,b)
+          LHSK(7,k) = LHSK(7,k) + xKebe(7,a,b)
+          LHSK(8,k) = LHSK(8,k) + xKebe(8,a,b)
+          LHSK(9,k) = LHSK(9,k) + xKebe(9,a,b)            
+          
+        enddo
+        
+      enddo
+      
+      return
+      end
+      
+c*      subroutine FillSkylineMat_3D_scalar(iel, ndiag, xKebe) 
+c*      
+c*      use aAdjKeep
+c*      
+c*      include "common.h"
+c*      
+c*      integer iel, ndiag(NNODZ)
+c*      
+c*      integer a, b, c, d, ee, n, k, locn, i, j
+c*      
+c*      real*8 xKebe(NSD*NSD,NSHL,NSHL)
+c*      
+c*      
+c*      do a = 1, NSHL
+c*        j = IEN(iel,a)
+c*        do b = 1, NSHL
+c*          i = IEN(iel,b)
+c*          if ( i .le. j ) then
+c*            
+c*            k = ndiag(j) - (j-i)
+c*            
+c*            LHSK(1,k) = LHSK(1,k) + xKebe(1,b,a)
+c*            LHSK(2,k) = LHSK(2,k) + xKebe(2,b,a)
+c*            LHSK(3,k) = LHSK(3,k) + xKebe(3,b,a)
+c*            LHSK(4,k) = LHSK(4,k) + xKebe(4,b,a)
+c*            LHSK(5,k) = LHSK(5,k) + xKebe(5,b,a)
+c*            LHSK(6,k) = LHSK(6,k) + xKebe(6,b,a)
+c*            LHSK(7,k) = LHSK(7,k) + xKebe(7,b,a)
+c*            LHSK(8,k) = LHSK(8,k) + xKebe(8,b,a)
+c*            LHSK(9,k) = LHSK(9,k) + xKebe(9,b,a)            
+c*            
+c*          endif
+c*          
+c*        enddo
+c*        
+c*      enddo
+c*      
+c*      return
+c*      end
+
+      
+      subroutine FillSkylineMat_3D_vector(iel, ndiag, xKebe) 
+      
+      use aAdjKeep
+      
+      include "common.h"
+      
+      integer iel, ndiag(NSD*NNODZ)
+      
+      integer a, b, c, d, ee, n, k, locn, i, j,
+     &  jv1, jv2, jv3, k1, k2, k3, k4, k5, k6, k7, k8, k9
+      
+      real*8 xKebe(NSD*NSD,NSHL,NSHL)
+      
+      
+      do a = 1, NSHL
+        j = IEN(iel,a)
+        jv1 = 3*(j-1)+1
+        jv2 = jv1+1
+        jv3 = jv2+1
+        do b = 1, NSHL
+          i = IEN(iel,b)
+          
+          if ( i .lt. j ) then  ! 9 entries
+            
+            k7 = ndiag(jv1) - 3*(j-i) + 2
+            k4 = ndiag(jv1) - 3*(j-i) + 1
+            k1 = ndiag(jv1) - 3*(j-i) 
+            
+            k8 = ndiag(jv2) - 3*(j-i) + 1
+            k5 = ndiag(jv2) - 3*(j-i) 
+            k2 = ndiag(jv2) - 3*(j-i) - 1
+            
+            k9 = ndiag(jv3) - 3*(j-i)
+            k6 = ndiag(jv3) - 3*(j-i) - 1
+            k3 = ndiag(jv3) - 3*(j-i) - 2
+            
+            LHSKV(k1) = LHSKV(k1) + xKebe(1,b,a)
+            LHSKV(k2) = LHSKV(k2) + xKebe(2,b,a)
+            LHSKV(k3) = LHSKV(k3) + xKebe(3,b,a)
+            LHSKV(k4) = LHSKV(k4) + xKebe(4,b,a)
+            LHSKV(k5) = LHSKV(k5) + xKebe(5,b,a)
+            LHSKV(k6) = LHSKV(k6) + xKebe(6,b,a)
+            LHSKV(k7) = LHSKV(k7) + xKebe(7,b,a)
+            LHSKV(k8) = LHSKV(k8) + xKebe(8,b,a)
+            LHSKV(k9) = LHSKV(k9) + xKebe(9,b,a)
+            
+          else if (i.eq.j) then ! 6 entries (diagonal block)
+            
+            k1 = ndiag(jv1)
+            k2 = ndiag(jv2) - 1
+            k3 = ndiag(jv3) - 2
+            
+            k5 = ndiag(jv2)
+            k6 = ndiag(jv3) - 1
+            
+            k9 = ndiag(jv3)
+            
+            LHSKV(k1) = LHSKV(k1) + xKebe(1,b,a)
+            LHSKV(k2) = LHSKV(k2) + xKebe(2,b,a)
+            LHSKV(k3) = LHSKV(k3) + xKebe(3,b,a)
+            
+            LHSKV(k5) = LHSKV(k5) + xKebe(5,b,a)
+            LHSKV(k6) = LHSKV(k6) + xKebe(6,b,a)
+            
+            
+            LHSKV(k9) = LHSKV(k9) + xKebe(9,b,a)
+            
+          endif
+          
+        enddo
+        
+      enddo
+      
+      return
+      end
+      
+      subroutine FillSkylineMat_3D_vector_iper(iel, ndiag, xKebe) 
+      
+      use aAdjKeep
+      
+      include "common.h"
+      
+      integer iel, ndiag(NSD*NNODZ)
+      
+      integer a, b, c, d, ee, n, k, locn, i, j,
+     &  jv1, jv2, jv3, k1, k2, k3, k4, k5, k6, k7, k8, k9,
+     &  im, jm, jv1m, jv2m, jv3m
+      
+      real*8 xKebe(NSD*NSD,NSHL,NSHL)
+      
+      
+      do a = 1, NSHL
+        
+        j = IEN(iel,a)
+        jv1 = 3*(j-1)+1
+        jv2 = jv1+1
+        jv3 = jv2+1
+        
+        jm = IPER(j)
+        jv1m = 3*(jm-1)+1
+        jv2m = jv1m+1
+        jv3m = jv2m+1
+        do b = 1, NSHL
+          i = IEN(iel,b)
+          im = IPER(i)
+          
+          if (im .lt. jm) then  ! 9 entries
+            
+            k7 = ndiag(jv1m) - 3*(jm-im) + 2
+            k4 = ndiag(jv1m) - 3*(jm-im) + 1
+            k1 = ndiag(jv1m) - 3*(jm-im) 
+            
+            k8 = ndiag(jv2m) - 3*(jm-im) + 1
+            k5 = ndiag(jv2m) - 3*(jm-im) 
+            k2 = ndiag(jv2m) - 3*(jm-im) - 1
+            
+            k9 = ndiag(jv3m) - 3*(jm-im)
+            k6 = ndiag(jv3m) - 3*(jm-im) - 1
+            k3 = ndiag(jv3m) - 3*(jm-im) - 2
+            
+            LHSKV(k1) = LHSKV(k1) + xKebe(1,b,a)
+            LHSKV(k2) = LHSKV(k2) + xKebe(2,b,a)
+            LHSKV(k3) = LHSKV(k3) + xKebe(3,b,a)
+            LHSKV(k4) = LHSKV(k4) + xKebe(4,b,a)
+            LHSKV(k5) = LHSKV(k5) + xKebe(5,b,a)
+            LHSKV(k6) = LHSKV(k6) + xKebe(6,b,a)
+            LHSKV(k7) = LHSKV(k7) + xKebe(7,b,a)
+            LHSKV(k8) = LHSKV(k8) + xKebe(8,b,a)
+            LHSKV(k9) = LHSKV(k9) + xKebe(9,b,a)
+            
+          else if (im.eq.jm) then ! 6 entries (diagonal block, master only)
+            
+            k1 = ndiag(jv1m)
+            k2 = ndiag(jv2m) - 1
+            k3 = ndiag(jv3m) - 2
+            
+            k5 = ndiag(jv2m)
+            k6 = ndiag(jv3m) - 1
+            
+            k9 = ndiag(jv3m)
+            
+            LHSKV(k1) = LHSKV(k1) + xKebe(1,b,a)
+            LHSKV(k2) = LHSKV(k2) + xKebe(2,b,a)
+            LHSKV(k3) = LHSKV(k3) + xKebe(3,b,a)
+            
+            LHSKV(k5) = LHSKV(k5) + xKebe(5,b,a)
+            LHSKV(k6) = LHSKV(k6) + xKebe(6,b,a)
+            
+            
+            LHSKV(k9) = LHSKV(k9) + xKebe(9,b,a)
+            
+            if (IPER(j).ne.j) then
+              
+              k1 = ndiag(jv1)
+              k2 = ndiag(jv2) - 1
+              k3 = ndiag(jv3) - 2
+              
+              k5 = ndiag(jv2)
+              k6 = ndiag(jv3) - 1
+              
+              k9 = ndiag(jv3)
+              
+              LHSKV(k1) = 1d+0
+              LHSKV(k5) = 1d+0
+              LHSKV(k9) = 1d+0
+              
+c              LHSKV(k2) = 0d+0
+c              LHSKV(k3) = 0d+0
+c              LHSKV(k6) = 0d+0
+              
+            endif
+            
+          endif
+          
+        enddo
+        
+      enddo
+      
+      return
+      end
+      
+c     Modified Dec. 7, 2009
+c     Cristhopper Armenta
+      
+      subroutine IntElmAss2(col, row)
+      
+      use aAdjKeep
+
+      include "common.h"
+      
+
+
+c...  Local variables
+      integer iel, igauss, jgauss, kgauss, i, j, k, a, b, c, d, e
+      integer offsetR, offsetL, MCP_NZA, NCP_NZA, OCP_NZA, counter
+      integer row(NNODZ*8*(P+1)*(Q+1)*(R+1)), col(NNODZ+1), ni,nj, nk
+      real*8  shg(NSHL), shgradl(NSHL,NSD),
+     &     shgradg(NSHL,NSD)
+      real*8 gp(NGAUSS), gw(NGAUSS), gwt, da, volm, Pload
+      real*8 gwp(NGAUSS*NGAUSS*NGAUSS)
+      real*8 lambda, numod, Emod, mu, fx, fy, fz, yl(NSHL,NSD)
+c     real*8 xKebe(NSD*NSD,NSHL,NSHL), Rhs(NSD,NSHL)
+!     real*8 xKebe(NSD*NSD*NSHL*NSHL*NEL), Rhs(NSD*NSHL*NEL)
+      
+!     For timing purposes      
+      real start,finish, begin,endl,beginloop,endloop, time0,time1 
+      real time2, time3, time4
+      
+c     goto 250
+!     Variables to hold the gauss points at every iteration 
+!     while generating the evaluated basis functions 
+      real*8 u, v, w
+!     Basis functions evaluated over all gausspoints      
+c     real*8 N(2*(P+1)*NGAUSS*NEL), M(2*(Q+1)*NGAUSS*NEL) 
+c     real*8 O(2*(R+1)*NGAUSS*NEL)
+      real*8, allocatable :: M(:), N(:), O(:)
+      real*8, allocatable :: xKebe(:), Rhs(:)
+      integer, allocatable :: NZA(:), NZM(:), NZN(:), NZO(:)    
+      
+c     250     continue
+      
+c     open (666, file='inputs.dat', status='unknown')
+      
+      call cpu_time(start)    
+
+c...  get Gaussian points and weights
+      gp = 0d+0
+      gw = 0d+0
+      call genGPandGW(gp,gw)
+      
+      volm = 0d+0               ! Volume of the solid (for debugging)
+      NEL_NZA = 0               ! Elements of non-zero area
+      MCP_NZA = 0				! Counting nonzero elements in 1D
+      NCP_NZA = 0
+      OCP_NZA = 0
+      d = 1
+      counter = 0				! Make it zero for counting NZA
+      
+c     ***********************************************
+
+c..   Counting nonzero spans in 1D M
+      do ni = 1, MCP+P  
+         if( dabs( U_KNOT( ni+1 ) - U_KNOT( ni ) )  > 1.0d-12 ) then
+            MCP_NZA = MCP_NZA + 1
+         endif
+      enddo
+
+c...    Counting nonzero spans in N  
+      do ni = 1, NCP+Q
+         if( dabs( V_KNOT( ni+1 ) - V_KNOT( ni ) )  > 1.0d-12 ) then
+            NCP_NZA = NCP_NZA + 1
+         endif
+      enddo	 
+	  
+c...    Counting nonzero spans in O	  
+      do ni = 1, OCP+R
+         if( dabs( W_KNOT( ni+1 ) - W_KNOT( ni ) ) > 1.0d-12 ) then
+            OCP_NZA = OCP_NZA + 1
+         endif
+      enddo	
+	  
+	  
+c     Multiplying the nonzero elements in 1D to find the number in 3D
+      NEL_NZA = MCP_NZA * NCP_NZA * OCP_NZA	  
+      
+c     Allocating dynamically vectors with basisfuns across all gausspoints
+      allocate (M(2*(P+1)*NGAUSS*MCP_NZA))
+      allocate (N(2*(Q+1)*NGAUSS*NCP_NZA))
+      allocate (O(2*(R+1)*NGAUSS*OCP_NZA))
+      
+      
+c     Allocating dynamically xKebe and Rhs until we know the amount of nonzero elements
+      allocate (xKebe(NSD*NSD*NSHL*NSHL*NEL_NZA), Rhs(NSD*NSHL*NEL_NZA))
+
+c     Allocating Nonzero elements    
+      allocate (NZA(NEL_NZA)) 
+      allocate (NZM(NEL_NZA))
+      allocate (NZN(NEL_NZA))
+      allocate (NZO(NEL_NZA))         
+     	
+c     Counters for NZM,NZN,NZO. For a given non-zero span, what are my
+c     1D non-zero span IDs?
+      i = 0 
+      j = 0
+      k = 0
+
+c     NZA is different. For a given non-zero span, what is my element ID
+c     counting the zero spans?
+
+      do nk=1,OCP+R
+         if(dabs(W_KNOT(nk+1)-W_KNOT(nk))>1.0d-12) then
+
+						j = 0
+            do nj=1,NCP+Q
+               if(dabs(V_KNOT(nj+1)-V_KNOT(nj))>1.0d-12) then
+             		  
+             		  i = 0
+                  do ni = 1, MCP+P  
+                     if(dabs(U_KNOT(ni+1)-U_KNOT(ni))>1.0d-12) then            
+
+                        iel = k*(MCP_NZA*NCP_NZA) + j*(MCP_NZA) + i + 1
+                        NZM(iel) = i
+                        NZN(iel) = j
+                        NZO(iel) = k
+c                        NZA(iel)=(nk-1)*((MCP+P)*(NCP+Q))+(nj-1)*(MCP+P)+ni                
+                        
+c                        print *, NZA(iel), nk, MCP+P, NCP+Q, nj, ni
+
+                        i=i+1
+                     endif
+                  enddo
+
+                  j=j+1
+               endif
+            enddo
+
+            k=k+1
+         endif
+      enddo
+			
+			counter = 0
+c$$$
+c$$$c...  loop over elements
+      do iel = 1, NEL
+c$$$         
+c...  Check to see if current element has nonzero area
+         ni = INN(IEN(iel,1),1) ! get NURBS coordinates
+         nj = INN(IEN(iel,1),2)
+         nk = INN(IEN(iel,1),3)
+         
+
+         if (((dabs(U_KNOT(ni+1)-U_KNOT(ni)))>1.0d-12).and.
+     &        ((dabs(V_KNOT(nj+1)-V_KNOT(nj)))>1.0d-12).and.
+     &        ((dabs(W_KNOT(nk+1)-W_KNOT(nk)))>1.0d-12)) then 
+c...  element has positive area in the parametric domain
+
+c		   print *, counter
+
+c     Counting the nonzero elements to store them contiguosly in NZA
+            counter = counter + 1             
+
+c     Storing contiguously the ids of the non-zero elements to send to the gpu
+            NZA(counter) = iel            
+
+         endif         
+
+      enddo                 
+           
+       	 
+cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc      
+ccccc Precomputing the basis functions in 1D for only nonzero spans!!!      
+
+      counter = 0
+      
+      do ni = 1, MCP+P ! + 1 - 1
+
+         if( dabs( U_KNOT( ni+1 ) - U_KNOT( ni ) )  > 1.0d-12 ) then		
+            
+            counter = counter + 1
+            
+            do igauss = 1, NGAUSS
+               
+               u = ((U_KNOT(ni+1)-U_KNOT(ni))*gp(igauss) + 
+     &              U_KNOT(ni+1) + U_KNOT(ni))*0.5d0            		
+               
+               call dersbasisfuns3(ni,P,MCP,u,U_KNOT,M, 
+     &              counter, MCP_NZA, igauss, NGAUSS ) ! calculate in u direction
+               
+            enddo
+         endif
+      enddo
+            
+      counter = 0
+      
+      do ni = 1, NCP+Q          ! + 1 - 1
+         
+         if( dabs( V_KNOT( ni+1 ) - V_KNOT( ni ) )  > 1.0d-12 ) then
+	    
+            counter = counter + 1
+            
+            do igauss = 1, NGAUSS      	        	  
+               
+               v = ((V_KNOT(ni+1)-V_KNOT(ni))*gp(igauss) + 
+     &              V_KNOT(ni+1) + V_KNOT(ni))*0.5d0     
+               
+               call dersbasisfuns3(ni,Q,NCP,v,V_KNOT,N,
+     &              counter, NCP_NZA, igauss, NGAUSS) ! calculate in v direction
+               
+            enddo
+         endif
+      enddo            
+      
+
+      counter = 0
+      
+      do ni = 1, OCP+R          ! + 1 - 1
+         
+         if( dabs( W_KNOT( ni+1 ) - W_KNOT( ni ) )  > 1.0d-12 ) then
+	    
+            counter = counter + 1
+            
+            do igauss = 1, NGAUSS      	        	  
+               
+               w = ((W_KNOT(ni+1)-W_KNOT(ni))*gp(igauss) + 
+     &              W_KNOT(ni+1) + W_KNOT(ni))*0.5d0          
+               
+               call dersbasisfuns3(ni,R,OCP,w,W_KNOT,O,
+     &              counter, OCP_NZA, igauss, NGAUSS) ! calculate in w direction
+               
+            enddo
+         endif
+      enddo   
+      
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc       
+      
+c     Now, loop through the nonzero-span knots to 
+c     generate N,M,O basis funs and derivatives vectors 
+c     across all gausspoints through the whole geometry
+      do iel = 1, NEL_NZA
+         
+c     Getting coordinates of the NonZero elements 
+         ni = INN(IEN(NZA(iel),1),1) ! get NURBS coordinates
+         nj = INN(IEN(NZA(iel),1),2)
+         nk = INN(IEN(NZA(iel),1),3)
+         
+c     We don't do 3D, because we're not tensor-producting yet
+c     The same loop is for all three of them
+         do igauss = 1, NGAUSS         
+            
+c     Get u and v coordinates of integration point
+c            u = ((U_KNOT(ni+1)-U_KNOT(ni))*gp(igauss) + 
+c     &           U_KNOT(ni+1) + U_KNOT(ni))*0.5d0
+c            v = ((V_KNOT(nj+1)-V_KNOT(nj))*gp(igauss) + 
+c     &           V_KNOT(nj+1) + V_KNOT(nj))*0.5d0
+c            w = ((W_KNOT(nk+1)-W_KNOT(nk))*gp(igauss) + 
+c     &           W_KNOT(nk+1) + W_KNOT(nk))*0.5d0
+c            
+            da = (U_KNOT(ni+1) - U_KNOT(ni))*
+     &           (V_KNOT(nj+1) - V_KNOT(nj))*
+     &           (W_KNOT(nk+1) - W_KNOT(nk))*0.125d0
+            
+c     Evaluate 1D shape functions and derivatives each direction
+c     across all gauss points and all elements
+c            call dersbasisfuns2(ni,P,MCP,u,U_KNOT,M, iel, 
+c     &           igauss, NGAUSS, NEL_NZA) ! calculate in u direction
+c            call dersbasisfuns2(nj,Q,NCP,v,V_KNOT,N, iel, 
+c     &           igauss, NGAUSS, NEL_NZA) ! calculate in v direction
+c            call dersbasisfuns2(nk,R,OCP,w,W_KNOT,O, iel, 
+c     &           igauss, NGAUSS, NEL_NZA) ! calculate in v direction        
+            
+         enddo
+      enddo
+      
+c     Tensor product the gauss weights. It still needs to calculate 
+c     da that normalizes the product to the unit cube
+      e = 1
+      do igauss = 1, NGAUSS         
+         do jgauss = 1, NGAUSS
+            do kgauss = 1, NGAUSS
+c     Tensor product the GW into one vector
+               gwp(e) = GW(igauss) * GW(jgauss) * GW(kgauss) * da
+               e = e + 1
+            enddo
+         enddo
+      enddo
+
+c     Only do this if there is non-zero elements
+      if(NEL_NZA.gt.0) then
+         
+         call cpu_time(begin)        
+         
+c         print *, INN(1,1), IEN(1,1), NZA(1)
+c         print *, MCP, MCP_NZA, NZM(1)
+c		  print *, NCP, NCP_NZA, NZN(1)
+c         print *, NCP, OCP, P, Q, R
+c         print *, NEL_NZA, NSHL, NSD, NGAUSS, NNODZ, NEL
+c         print *, B_NET(1,1,1,1),EY(1),NU(1),LD_el(1,1),xKebe(1),Rhs(1)
+c         print *, volm, DetJ,M(1), N(1), O(1), gwp(1)
+         
+         CALL intelc(INN, IEN, NZA, MCP, MCP_NZA, NZM, NCP,NCP_NZA,NZN, 
+     &         OCP, OCP_NZA, NZO, P, Q, R, NEL_NZA, NSHL, NSD, NGAUSS, 
+     &				NNODZ, NEL, B_NET, EY, NU, LD_el, xKebe, Rhs, volm, DetJ,
+     &        M, N, O, gwp)                     
+         
+         
+         call cpu_time(endl)     	
+         time1 = 0.0
+         time1 = endl - begin
+         
+         call cpu_time(begin)
+         
+
+
+         do counter = 1, NEL_NZA
+            
+            iel = NZA(counter)
+
+c     Add lift to the appropriate RHS location
+c     to account for Dirichlet conditions
+
+            do i = 1, NSHL
+               yl(i,:) = DIR_BC(IEN(iel,i),:)
+            enddo                
+            
+            offsetR = (counter-1)*NSHL*NSD
+            offsetL = (counter-1)*NSHL*NSHL*NSD*NSD
+            
+c     Changed to use 1D xkebe          
+            do j = 1, NSHL
+               do i = 1, NSHL
+                  Rhs(offsetR+i) = Rhs(offsetR+i) -
+     &                 xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+
+     &                 ((i-1)*NSD)+1) * yl(j,1) -
+     &                 xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+
+     &                 ((i-1)*NSD)+2) * yl(j,2) -
+     &                 xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+
+     &                 ((i-1)*NSD)+3) * yl(j,3)
+
+                  Rhs(offsetR+(NSHL)+ i) =
+     &                 Rhs(offsetR+(NSHL)+ i) -
+     &                 xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &                 ((i-1)*NSD)+ 1) * yl(j,1) -
+     &                 xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &                 ((i-1)*NSD)+ 2) * yl(j,2) -
+     &                 xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &                 ((i-1)*NSD)+ 3) * yl(j,3)
+
+                  Rhs(offsetR+(2*NSHL)+ i) =
+     &                 Rhs(offsetR+(2*NSHL)+ i) -
+     &                 xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &                 ((i-1)*NSD)+ 1) * yl(j,1) -
+     &                 xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &                 ((i-1)*NSD)+ 2) * yl(j,2) -
+     &                 xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &                 ((i-1)*NSD)+ 3) * yl(j,3)
+
+               enddo
+            enddo                    
+            
+c     VERSIONS 2 of these codes to work with 1D Xkebe
+c...  Modify local stiffness matrix for Dirichlet boundary conditions
+            call BCLhs2(iel, xKebe, Rhs, counter)                 
+            
+            
+c     100		 continue
+            
+            goto 200          
+            icount = 0
+            do j = 1, NSHL
+            do i = 1, NSHL
+               print *,
+     &              xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+((i-1)*NSD)+1),
+     &              xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+((i-1)*NSD)+2),
+     &              xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+((i-1)*NSD)+3)
+                  
+               print *,
+     &              xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &              ((i-1)*NSD)+1),
+     &              xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &              ((i-1)*NSD)+2),
+     &              xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(NSHL*NSD)+
+     &              ((i-1)*NSD)+3)
+                  
+               print *,xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &              ((i-1)*NSD)+1),
+     &              xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &              ((i-1)*NSD)+2),
+     &              xKebe(offsetL+((j-1)*NSHL*NSD*NSD)+(2*NSHL*NSD)+
+     &              ((i-1)*NSD)+3)
+                  
+               print *       			
+            enddo
+            print *        
+            enddo     
+            
+c     do i = 1, NSHL
+c     print *,Rhs(offsetR+i),
+c     &      			Rhs(offsetR+NSHL+i),
+c     &					Rhs(offsetR+(2*NSHL)+i)
+c     enddo      	  	
+            
+ 200        continue        
+            
+            
+            
+            
+c...  assemble into the Sparse Global Stiffness Matrix and Rhs Vector
+            call FillSparseMat2 (iel, col, row, xKebe, counter) ! Assemble stiffness
+            call LocaltoGlobal2 (iel, Rhs, counter) ! Assemble load vector          
+            
+c     write(*,*) "Elem number: ", iel, NEL_NZA, NEL
+
+         enddo
+         
+         call cpu_time(endl)
+         time2 = endl - begin
+         
+      endif
+      
+      write(*,*) "Volume of Solid =    ", volm 
+      
+c     stop        
+      
+      call cpu_time(finish)
+      time0 = finish-start
+      
+      
+      
+c     goto 100
+      print *
+      print *
+      print *
+      print *, "------------------------------------------"      	 
+      print *, 'Time profile on IntElmAss routine'
+      print *, "------------------------------------------"
+      print '(" CUDA routine = ",f13.6," seconds.")',time1
+c     print *,'(" Assembly     = "',time2,'" seconds.")'
+      print '(" Assembly     = ",f13.6," seconds.")',time2
+      print *, "------------------------------------------"
+      print '(" Total time   = ",f13.6," seconds.")',time0
+      print *, "------------------------------------------"
+      print *
+      print *
+c     100		continue
+
+
+
+      deallocate (xKebe, Rhs, M, N, O, NZA, NZM, NZN ) ! NZO)
+      
+c     stop    
+      
+      return
+      end
+      
+c     Modified Jan. 20, 2004
+c     J. Austin Cottrell
+      
+      subroutine IntElmAss_3D(col, row)
+      
+      use aAdjKeep
+
+      include "common.h"
+      
+
+
+c...  Local variables
+      integer iel, igauss, jgauss, kgauss, i, j, k
+      integer row(NNODZ*8*(P+1)*(Q+1)*(R+1)), col(NNODZ+1), ni,nj, nk
+      real*8  shg(NSHL), shgradl(NSHL,NSD),
+     &     shgradg(NSHL,NSD)
+      real*8 gp(NGAUSS), gw(NGAUSS), gwt, da
+      real*8 lambda, numod, Emod, mu, fx, fy, fz, yl(NSHL,NSD)
+      real*8 xKebe(NSD*NSD,NSHL,NSHL), Rhs(NSD,NSHL), volm, Pload
+      real start,finish, begin,endl,beginloop,endloop, time0,time1, time2, time3, time4
+      
+!     CPU_TIME_6
+      call cpu_time(start)
+
+c...  get Gaussian points and weights
+      gp = 0d+0
+      gw = 0d+0
+      call genGPandGW(gp,gw)
+      
+      volm = 0d+0               ! Volume of the solid (for debugging)
+      NEL_NZA = 0               ! Elements of non-zero area
+
+
+c     ***********************************************
+c...  loop over elements
+      do iel = 1, NEL
+         
+c...  Check to see if current element has nonzero area
+         ni = INN(IEN(iel,1),1) ! get NURBS coordinates
+         nj = INN(IEN(iel,1),2)
+         nk = INN(IEN(iel,1),3)
+         
+         if ((U_KNOT(ni).ne.U_KNOT(ni+1)).and.
+     &        (V_KNOT(nj).ne.V_KNOT(nj+1)).and.
+     &        (W_KNOT(nk).ne.W_KNOT(nk+1))) then ! element has positive area
+! in the parametric domain
+            
+            NEL_NZA = NEL_NZA + 1
+            
+            da = (U_KNOT(ni+1) - U_KNOT(ni))*
+     &           (V_KNOT(nj+1) - V_KNOT(nj))*
+     &           (W_KNOT(nk+1) - W_KNOT(nk))/8d+0 ! used in calculating 
+! quadrature points. The factor of 8d+0
+! comes from mapping from the [-1,1]
+! line onto a real segment...
+            
+c...  Set up element parameters
+            Emod = EY(iel)      ! Young's modulus
+            numod = NU(iel)     ! Poisson's ratio
+            
+            fx = LD_el(iel,1)   ! body force contributions
+            fy = LD_el(iel,2)
+            fz = LD_el(iel,3)
+            
+            lambda = numod*Emod/((1d+0+numod)*(1d+0-2d+0*numod))
+            mu = Emod/(2d+0*(1d+0+numod)) ! PDE
+            
+            xKebe = 0d+0        ! initialize local stiffness matrix
+            Rhs = 0d+0          ! initialize local load vector
+            
+            
+            
+!     CPU_TIME_1
+            call cpu_time(beginloop)    
+c     --------------------------------------------------------
+c...  Loop over integration points (NGAUSS in each direction)
+            do igauss = 1, NGAUSS
+               do jgauss = 1, NGAUSS
+                  do kgauss = 1, NGAUSS
+                     
+c...  Get Element Shape functions and their gradients
+                     shg = 0d+0 ! Shape function
+                     shgradl = 0d+0 ! shape fun. gradient w.r.t. local directions u,v,w
+                     shgradg = 0d+0 ! shape fun. grad. w.r.t. global directions x,y,z
+                     
+!     CPU_TIME_2
+                     call cpu_time(begin)    
+                     call eval_SHAPE_3D(iel, gp(igauss), gp(jgauss),
+     &                    gp(kgauss), shg, shgradl,shgradg)
+                     
+                     call cpu_time(endl)
+
+                     time2 = time2 + (endl-begin)
+!     CPU_TIME_END_2
+                     
+c...  Calculate given element stiffness matrix and force vector
+                     gwt = gw(igauss)*gw(jgauss)*gw(kgauss)*da               
+
+!     CPU_TIME_3
+                     call cpu_time(begin)    
+                     call e3LHS_3D(lambda, mu, gwt, 
+     &                    shgradg, xKebe) ! local stiffness
+
+                     call cpu_time(endl)
+
+                     time3 = time3 + (endl-begin)
+!     CPU_TIME_END_3
+
+
+
+!     CPU_TIME_4
+                     call cpu_time(begin)                    
+                     call e3Rhs_3D(fx, fy, fz, gwt, shg, Rhs,volm) ! local forcing
+                     call cpu_time(endl)
+
+                     time4 = time4 + (endl-begin)
+!     CPU_TIME_END_4
+                     
+
+                  enddo
+               enddo
+            enddo               ! end integration loop     
+            
+            call cpu_time(endloop)
+
+            time1 = endloop-beginloop
+
+            do j = 1, NSHL
+               do k = 1, NSHL
+                  print *,j,k,"----------------"
+                  print *, xKebe(1,j,k), xKebe(2,j,k), xKebe(3,j,k) 
+                  print *, xKebe(4,j,k), xKebe(5,j,k), xKebe(6,j,k)
+                  print *, xKebe(7,j,k), xKebe(8,j,k), xKebe(9,j,k)
+               enddo
+            enddo
+
+
+!     CPU_TIME_END_1
+
+c     Add lift to the appropriate RHS location
+c     to account for Dirichlet conditions
+            do i = 1, NSHL
+               yl(i,:) = DIR_BC(IEN(iel,i),:)
+            enddo
+            
+            
+            do i = 1, NSHL
+               do j = 1, NSHL
+                  Rhs(1,i) = Rhs(1,i) - xKebe(1,i,j)*yl(j,1) -
+     &                 xKebe(2,i,j)*yl(j,2) - xKebe(3,i,j)*yl(j,3)
+                  Rhs(2,i) = Rhs(2,i) - xKebe(4,i,j)*yl(j,1) -
+     &                 xKebe(5,i,j)*yl(j,2) - xKebe(6,i,j)*yl(j,3)
+                  Rhs(3,i) = Rhs(3,i) - xKebe(7,i,j)*yl(j,1) -
+     &                 xKebe(8,i,j)*yl(j,2) - xKebe(9,i,j)*yl(j,3)
+               enddo
+            enddo
+            
+            
+c...  Modify local stiffness matrix for Dirichlet boundary conditions
+            call BCLhs_3D(iel, xKebe, Rhs)
+            
+c...  assemble into the Sparse Global Stiffness Matrix and Rhs Vector
+            call FillSparseMat_3D (iel, col, row, xKebe) ! Assemble stiffness
+            call LocaltoGlobal_3D (iel, Rhs) ! Assemble load vector
+            
+         endif
+         
+      enddo
+      
+      write(*,*) "Volume of Solid =    ", volm
+
+      call cpu_time(finish)
+
+      time0 = finish-start
+
+!     CPU_TIME_END
+
+c     print *,''
+
+c     print *, 'Time5.1 All elem Loop = ',time1,'seconds.'
+c     print *, 'Time5.2 eval_SHAPE_3D = ',time2,'seconds.'
+c     print *, 'Time5.3 call e3LHS_3D = ',time3,'seconds.'
+c     print *, 'Time5.4 call e3Rhs_3D = ',time4,'seconds.'
+c     print *, 'Total time = ',time0,'seconds'
+
+c     print *,''
+      
+      return
+      end
+
+
+c     *********************************************************************
+c     *********************************************************************
+
+
+      
+c     Modified Jan. 20, 2004
+
+      
+      subroutine IntElmAss_3D_Skyline(ndiag)
+      
+      use aAdjKeep
+
+      include "common.h"
+      
+
+c...  Local variables
+      integer iel, igauss, jgauss, kgauss, i, j, k
+      integer row(NNODZ*8*(P+1)*(Q+1)*(R+1)), col(NNODZ+1), ni,nj, nk,
+     &     ndiag(NSD*NNODZ)
+      real*8  shg(NSHL), shgradl(NSHL,NSD),
+     &     shgradg(NSHL,NSD)
+      real*8 gp(NGAUSS), gw(NGAUSS), gwt, da
+      real*8 lambda, numod, Emod, mu, fx, fy, fz, yl(NSHL,NSD)
+      real*8 xKebe(NSD*NSD,NSHL,NSHL), Rhs(NSD,NSHL), volm, Pload
+      
+
+
+c...  get Gaussian points and weights
+      gp = 0d+0
+      gw = 0d+0
+      call genGPandGW(gp,gw)
+      
+      volm = 0d+0               ! Volume of the solid (for debugging)
+      NEL_NZA = 0               ! Elements of non-zero area
+
+
+c     ***********************************************
+c...  loop over elements
+      do iel = 1, NEL
+         
+c...  Check to see if current element has nonzero area
+         ni = INN(IEN(iel,1),1) ! get NURBS coordinates
+         nj = INN(IEN(iel,1),2)
+         nk = INN(IEN(iel,1),3)
+         
+c     print *, iel, ni, IEN(iel,1), INN(IEN(iel,1),1)
+c     print *, iel, nj, IEN(iel,1), INN(IEN(iel,1),2)
+c     print *, iel, nk, IEN(iel,1), INN(IEN(iel,1),3)
+         
+         if ((U_KNOT(ni).ne.U_KNOT(ni+1)).and.
+     &        (V_KNOT(nj).ne.V_KNOT(nj+1)).and.
+     &        (W_KNOT(nk).ne.W_KNOT(nk+1))) then ! element has positive area
+! in the parametric domain
+
+            NEL_NZA = NEL_NZA + 1
+            
+            da = (U_KNOT(ni+1) - U_KNOT(ni))*
+     &           (V_KNOT(nj+1) - V_KNOT(nj))*
+     &           (W_KNOT(nk+1) - W_KNOT(nk))/8d+0 ! used in calculating 
+! quadrature points. The factor of 8d+0
+! comes from mapping from the [-1,1]
+! line onto a real segment...
+            print *, "da", da                             
+            
+c...  Set up element parameters
+            Emod = EY(iel)      ! Young's modulus
+            numod = NU(iel)     ! Poisson's ratio
+            
+            print *, "EY NU", Emod, numod
+            
+            fx = LD_el(iel,1)   ! body force contributions
+            fy = LD_el(iel,2)
+            fz = LD_el(iel,3)
+            
+            print *, "fx fy fz", fx, fz, fy
+            
+            lambda = numod*Emod/((1d+0+numod)*(1d+0-2d+0*numod))
+            mu = Emod/(2d+0*(1d+0+numod)) ! PDE
+            
+            xKebe = 0d+0        ! initialize local stiffness matrix
+            Rhs = 0d+0          ! initialize local load vector
+            
+            
+            
+            
+c     --------------------------------------------------------
+c...  Loop over integration points (NGAUSS in each direction)
+            do igauss = 1, NGAUSS
+               do jgauss = 1, NGAUSS
+                  do kgauss = 1, NGAUSS
+                     
+c...  Get Element Shape functions and their gradients
+                     shg = 0d+0 ! Shape function
+                     shgradl = 0d+0 ! shape fun. gradient w.r.t. local directions u,v,w
+                     shgradg = 0d+0 ! shape fun. grad. w.r.t. global directions x,y,z
+                     call eval_SHAPE_3D(iel, gp(igauss), gp(jgauss),
+     &                    gp(kgauss), shg, shgradl,shgradg)
+                     
+                     
+                     
+c...  Calculate given element stiffness matrix and force vector
+                     gwt = gw(igauss)*gw(jgauss)*gw(kgauss)*da               
+                     call e3LHS_3D(lambda, mu, gwt, 
+     &                    shgradg, xKebe) ! local stiffness
+                     
+                     call e3Rhs_3D(fx, fy, fz, gwt, shg, Rhs,volm) ! local forcing
+                     
+                  enddo
+               enddo
+            enddo               ! end integration loop     
+            
+            
+c     Add lift to the appropriate RHS location
+c     to account for Dirichlet conditions
+            do i = 1, NSHL
+               yl(i,:) = DIR_BC(IEN(iel,i),:)
+            enddo
+            
+            
+            do i = 1, NSHL
+               do j = 1, NSHL
+                  Rhs(1,i) = Rhs(1,i) - xKebe(1,i,j)*yl(j,1) -
+     &                 xKebe(2,i,j)*yl(j,2) - xKebe(3,i,j)*yl(j,3)
+                  Rhs(2,i) = Rhs(2,i) - xKebe(4,i,j)*yl(j,1) -
+     &                 xKebe(5,i,j)*yl(j,2) - xKebe(6,i,j)*yl(j,3)
+                  Rhs(3,i) = Rhs(3,i) - xKebe(7,i,j)*yl(j,1) -
+     &                 xKebe(8,i,j)*yl(j,2) - xKebe(9,i,j)*yl(j,3)
+               enddo
+            enddo
+            
+            
+c...  Modify local stiffness matrix for Dirichlet boundary conditions
+            call BCLhs_3D(iel, xKebe, Rhs)
+            
+c...  assemble into the Sparse Global Stiffness Matrix and Rhs Vector
+            call FillSkylineMat_3D_vector_iper(iel, ndiag, xKebe) ! Assemble stiffness
+            call LocaltoGlobal_3D (iel, Rhs) ! Assemble load vector
+            
+         endif
+         
+      enddo
+      
+      write(*,*) "Volume of Solid =    ", volm
+      
+      return
+      end
+      
+c
+c     This routine takes the local load vector and places it into the global vector.
+c    
+c
+c     modified from original code by Jurijs Bazilevs
+c
+c     July 1, 2003
+c
+c     J. Austin Cottrell, III
+c
+c     Nov. 15 2004 - modified by Jurjis Bazilevs for 3D case
+
+      subroutine LocaltoGlobal2 (iel, Rhs, counter)
+     
+      use aAdjKeep
+      
+      include "common.h"
+
+      integer iel
+      integer aa, bb, counter
+      real*8 Rhs(NSD*NSHL*NEL_NZA)
+      integer offsetR 
+      
+      offsetR = (counter-1)*NSHL*NSD
+
+      do aa = 1, 3
+         do bb = 1, NSHL
+            
+            RHSG(IEN(iel,bb),aa) = RHSG(IEN(iel,bb),aa) + 
+     &           Rhs(offsetR+((aa-1)*NSHL)+bb)          	    
+
+         enddo
+      enddo     
+     
+      return
+      end

LocaltoGlobal_3D.f

+c
+c     This routine takes the local load vector and places it into the global vector.
+c    
+c
+c     modified from original code by Jurijs Bazilevs
+c
+c     July 1, 2003
+c
+c     J. Austin Cottrell, III
+c
+c     Nov. 15 2004 - modified by Jurjis Bazilevs for 3D case
+
+      subroutine LocaltoGlobal_3D (iel, Rhs)
+     
+      use aAdjKeep
+      
+      include "common.h"
+
+      integer iel
+      integer aa, bb
+      real*8 Rhs(NSD,NSHL)
+
+      do aa = 1, 3
+         do bb = 1, NSHL
+            
+            RHSG(IEN(iel,bb),aa) = RHSG(IEN(iel,bb),aa) + 
+     &           Rhs(aa,bb)
+
+         enddo
+      enddo
+
+     
+      return
+      end
+
+
+      subroutine SparseProd_3D (colvec, rowvec, rhstmp, prodtmp)
+
+      use aAdjKeep
+
+      include "common.h"
+
+
+      integer colvec(NNODZ+1), rowvec(NNODZ*8*(P+1)*(Q+1)*(R+1))
+      integer aa, bb, cc
+      real*8  rhstmp(NNODZ,3), prodtmp(NNODZ,3)
+      real*8	tmp1,	tmp2,	tmp3,	pisave
+      
+c.... clear the vector
+
+      do aa = 1, NNODZ
+        prodtmp(aa,1) = 0d+0
+        prodtmp(aa,2) = 0d+0
+        prodtmp(aa,3) = 0d+0
+      enddo
+
+      do aa = 1, NNODZ
+         
+        tmp1 = 0d+0
+        tmp2 = 0d+0
+        tmp3 = 0d+0
+         
+        do bb = colvec(aa), colvec(aa+1)-1
+          cc = rowvec(bb)
+
+          tmp1 = tmp1 + LHSK(1,bb)*rhstmp(cc,1) + 
+     &      LHSK(2,bb)*rhstmp(cc,2) + LHSK(3,bb)*rhstmp(cc,3)
+          tmp2 = tmp2 + LHSK(4,bb)*rhstmp(cc,1) + 
+     &      LHSK(5,bb)*rhstmp(cc,2) + LHSK(6,bb)*rhstmp(cc,3)
+          tmp3 = tmp3 + LHSK(7,bb)*rhstmp(cc,1) + 
+     &      LHSK(8,bb)*rhstmp(cc,2) + LHSK(9,bb)*rhstmp(cc,3)
+          
+           enddo
+
+           prodtmp(aa,1) = prodtmp(aa,1) + tmp1
+           prodtmp(aa,2) = prodtmp(aa,2) + tmp2
+           prodtmp(aa,3) = prodtmp(aa,3) + tmp3
+
+        enddo
+
+        return
+        end
+
+
+
+      
+      subroutine SparseCG_3D(colvec, rowvec, soln)
+      
+      use aAdjKeep
+      
+      include "common.h"
+      
+      real*8, allocatable :: lhsKdiag(:,:)
+      
+      integer colvec(NNODZ+1), rowvec(NNODZ*8*(P+1)*(Q+1)*(R+1))
+      integer n, i, j, k, iter
+      real*8  rhstmp(NNODZ,3), prodtmp(NNODZ,3), rr, rr1, rr2, rr3, 
+     &  pp, alpha, soln(NNODZ,3), tmprr, beta, pv(NNODZ,3), rr0,
+     &  tauk, taukm1, zs(NNODZ,3)
+      
+      
+      allocate(lhsKdiag(3,NNODZ))
+      
+c...  extract diagonal vector
+      
+      do i = 1, NNODZ
+        do j = colvec(i), colvec(i+1)-1
+          n = rowvec(j)
+          if (n.eq.i) then
+            lhsKdiag(1,i) = 1d+0/LHSK(1,j)
+            lhsKdiag(2,i) = 1d+0/LHSK(5,j)
+            lhsKdiag(3,i) = 1d+0/LHSK(9,j)
+          endif
+        enddo
+      enddo
+      
+      
+c...  communicate the diagonal
+      
+      do i = 1, NNODZ           ! If a node is a slave
+        if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3)) then 
+          lhsKdiag(:,IPER(i)) = lhsKdiag(:,IPER(i)) + lhsKdiag(:,i) ! Ma = Ma+Sl
+        endif
+      enddo
+      
+      do i = 1, NNODZ        
+        if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3)) then
+          lhsKdiag(:,i) = lhsKdiag(:,IPER(i))
+        endif        
+      enddo      
+
+      
+      rhstmp(:,1) = RHSG(:,1)
+      rhstmp(:,2) = RHSG(:,2)
+      rhstmp(:,3) = RHSG(:,3)
+      
+      soln = 0d+0
+      rr = 0d+0
+      rr1 = 0d+0
+      rr2 = 0d+0
+      rr3 = 0d+0
+      
+      do n = 1, NNODZ
+        rr1 = rr1 + rhstmp(n,1)*rhstmp(n,1)
+        rr2 = rr2 + rhstmp(n,2)*rhstmp(n,2)
+        rr3 = rr3 + rhstmp(n,3)*rhstmp(n,3)
+      enddo
+      
+      rr = rr1+rr2+rr3
+      rr0 = rr
+      
+      do iter = 1, 5000
+        
+
+        write(*,*) iter, sqrt(rr/rr0), rr , rr0
+        
+        do i = 1, NNODZ
+          zs(i,1) = rhstmp(i,1)*lhsKdiag(1,i)
+          zs(i,2) = rhstmp(i,2)*lhsKdiag(2,i)
+          zs(i,3) = rhstmp(i,3)*lhsKdiag(3,i)
+        enddo
+        
+        tauk = 0d+0
+
+        do i = 1, NNODZ
+          do j = 1, 3
+            tauk = tauk + zs(i,j)*rhstmp(i,j)
+          enddo
+        enddo
+        
+        if(iter.eq.1) then
+          beta = 0d+0
+          pv = zs
+        else
+          beta = tauk/taukm1
+          pv(:,:) = zs(:,:) + beta*pv(:,:)
+        endif
+          
+c...    Periodicity (Slave = Master)         
+        
+        do i = 1, NNODZ
+          
+          if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3))
+     &      then
+            pv(i,:) = pv(IPER(i),:) ! Slave = Master
+          endif
+          
+        enddo
+        
+c...    Product
+        
+        call SparseProd_3D (colvec, rowvec, pv, prodtmp)
+        
+        
+c...    Communicate to Masters, Zero out Slaves
+        
+        do i = 1, NNODZ
+          
+          if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3))
+     &      then
+            
+            prodtmp(IPER(i),:) = prodtmp(IPER(i),:) + 
+     &        prodtmp(i,:)      ! Master = Master+Slave
+            prodtmp(i,:) = 0d+0 ! Slave = zero
+            
+          endif
+          
+        enddo           
+        
+c...    
+        
+        pp = 0d+0
+        rr1 = 0d+0
+        rr2 = 0d+0
+        rr3 = 0d+0
+        
+        do n = 1, NNODZ
+          rr1 = rr1 + pv(n,1)*prodtmp(n,1)
+          rr2 = rr2 + pv(n,2)*prodtmp(n,2)
+          rr3 = rr3 + pv(n,3)*prodtmp(n,3)
+        enddo
+        
+        pp = rr1 + rr2 + rr3
+        
+        alpha = tauk/pp
+        
+c....   calculate the next guess
+        
+        soln(:,1) = soln(:,1) + alpha*pv(:,1)
+        soln(:,2) = soln(:,2) + alpha*pv(:,2)
+        soln(:,3) = soln(:,3) + alpha*pv(:,3)
+        
+c....   calculate new res. and dot prod. 
+        
+        rhstmp(:,1) = rhstmp(:,1) - alpha*prodtmp(:,1)
+        rhstmp(:,2) = rhstmp(:,2) - alpha*prodtmp(:,2)
+        rhstmp(:,3) = rhstmp(:,3) - alpha*prodtmp(:,3)
+        
+        tmprr = rr
+        
+        rr = 0
+        rr1 = 0
+        rr2 = 0
+        rr3 = 0
+        
+        do n = 1, NNODZ
+          rr1 = rr1 + rhstmp(n,1)*rhstmp(n,1)
+          rr2 = rr2 + rhstmp(n,2)*rhstmp(n,2)
+          rr3 = rr3 + rhstmp(n,3)*rhstmp(n,3)
+        enddo
+        
+        rr = rr1 + rr2 + rr3
+        
+c....   check for convergence 
+
+        taukm1 = tauk
+        
+        if(sqrt(rr/rr0).lt.0.000000000001) goto 6000
+c        if(sqrt(rr/rr0).lt.0.000001) goto 6000
+        
+      enddo
+      
+ 6000 continue
+      
+c...  communicate solution
+      
+      do i = 1, NNODZ
+        
+        if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3))
+     &    then
+          soln(i,:) = soln(IPER(i),:) ! Slave = Master
+        endif
+        
+      enddo
+      
+      deallocate(lhsKdiag)
+      
+      write(*,9000) iter, sqrt(rr / rr0)
+      
+      return
+ 9000 format(20x,'  number of iterations:', i10,/,
+     &  20x,'    residual reduction:', 2x,e10.2)
+      
+      end
+
+      
+      subroutine SparseCG_BDIAG_3D(colvec, rowvec, soln)
+      
+      use aAdjKeep
+      
+      include "common.h"
+      
+      real*8, allocatable :: lhsKBdiag(:,:,:)
+      
+      integer colvec(NNODZ+1), rowvec(NNODZ*8*(P+1)*(Q+1)*(R+1))
+      integer n, i, j, k, iter
+      real*8  rhstmp(NNODZ,3), prodtmp(NNODZ,3), rr, rr1, rr2, rr3, 
+     &  pp, alpha, soln(NNODZ,3), tmprr, beta, pv(NNODZ,3), rr0,
+     &  tauk, taukm1, zs(NNODZ,3), Binv(3,3), tmp
+      
+      
+      allocate(lhsKBdiag(3,3,NNODZ))
+      
+c...  extract block-diagonal matrix vector
+      
+      do i = 1, NNODZ
+        do j = colvec(i), colvec(i+1)-1
+          n = rowvec(j)
+          if (n.eq.i) then
+            
+            lhsKBdiag(1,1,i) = LHSK(1,j)
+            lhsKBdiag(2,2,i) = LHSK(5,j)
+            lhsKBdiag(3,3,i) = LHSK(9,j)
+            
+            lhsKBdiag(1,2,i) = LHSK(2,j)
+            lhsKBdiag(1,3,i) = LHSK(3,j)
+            lhsKBdiag(2,1,i) = LHSK(4,j)
+            lhsKBdiag(2,3,i) = LHSK(6,j)
+            lhsKBdiag(3,1,i) = LHSK(7,j)
+            lhsKBdiag(3,2,i) = LHSK(8,j)
+            
+          endif
+        enddo
+      enddo
+      
+      
+c...  communicate the block-diagonal
+      
+      do i = 1, NNODZ           ! If a node is a slave
+        if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3)) then 
+          lhsKBdiag(:,:,IPER(i)) = lhsKBdiag(:,:,IPER(i))
+     &      + lhsKBdiag(:,:,i)  ! Ma = Ma+Sl
+        endif
+      enddo
+      
+      do i = 1, NNODZ        
+        if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3)) then
+          lhsKBdiag(:,:,i) = lhsKBdiag(:,:,IPER(i))
+        endif        
+      enddo      
+
+c...  invert block-diagonal
+
+      Binv = 0d+0
+      
+      do i = 1, NNODZ
+        
+        Binv(1,1) =   lhsKBdiag(2,2,i) * lhsKBdiag(3,3,i) 
+     &    - lhsKBdiag(3,2,i) * lhsKBdiag(2,3,i)
+        Binv(1,2) =   lhsKBdiag(3,2,i) * lhsKBdiag(1,3,i) 
+     &    - lhsKBdiag(1,2,i) * lhsKBdiag(3,3,i)
+        Binv(1,3) =  lhsKBdiag(1,2,i) * lhsKBdiag(2,3,i) 
+     &    - lhsKBdiag(1,3,i) * lhsKBdiag(2,2,i)
+        tmp          = 1d+0 / ( Binv(1,1) * lhsKBdiag(1,1,i) 
+     &    + Binv(1,2) * lhsKBdiag(2,1,i)  
+     &    + Binv(1,3) * lhsKBdiag(3,1,i) )
+        Binv(1,1) = Binv(1,1) * tmp
+        Binv(1,2) = Binv(1,2) * tmp
+        Binv(1,3) = Binv(1,3) * tmp
+        Binv(2,1) = (lhsKBdiag(2,3,i) * lhsKBdiag(3,1,i) 
+     &    - lhsKBdiag(2,1,i) * lhsKBdiag(3,3,i)) * tmp
+        Binv(2,2) = (lhsKBdiag(1,1,i) * lhsKBdiag(3,3,i) 
+     &    - lhsKBdiag(3,1,i) * lhsKBdiag(1,3,i)) * tmp
+        Binv(2,3) = (lhsKBdiag(2,1,i) * lhsKBdiag(1,3,i) 
+     &    - lhsKBdiag(1,1,i) * lhsKBdiag(2,3,i)) * tmp
+        Binv(3,1) = (lhsKBdiag(2,1,i) * lhsKBdiag(3,2,i) 
+     &    - lhsKBdiag(2,2,i) * lhsKBdiag(3,1,i)) * tmp
+        Binv(3,2) = (lhsKBdiag(3,1,i) * lhsKBdiag(1,2,i) 
+     &    - lhsKBdiag(1,1,i) * lhsKBdiag(3,2,i)) * tmp
+        Binv(3,3) = (lhsKBdiag(1,1,i) * lhsKBdiag(2,2,i) 
+     &    - lhsKBdiag(1,2,i) * lhsKBdiag(2,1,i)) * tmp
+        
+        lhsKBdiag(:,:,i) = Binv(:,:)
+        
+      enddo
+      
+      rhstmp(:,1) = RHSG(:,1)
+      rhstmp(:,2) = RHSG(:,2)
+      rhstmp(:,3) = RHSG(:,3)
+      
+      soln = 0d+0
+      rr = 0d+0
+      rr1 = 0d+0
+      rr2 = 0d+0
+      rr3 = 0d+0
+      
+      do n = 1, NNODZ
+        rr1 = rr1 + rhstmp(n,1)*rhstmp(n,1)
+        rr2 = rr2 + rhstmp(n,2)*rhstmp(n,2)
+        rr3 = rr3 + rhstmp(n,3)*rhstmp(n,3)
+      enddo
+      
+      rr = rr1+rr2+rr3
+      rr0 = rr
+      
+      do iter = 1, NNODZ*100
+        
+        
+c...    Premultiply residual by inverse of Bdiag
+        
+        do i = 1, NNODZ
+          zs(i,1) = lhsKBdiag(1,1,i)*rhstmp(i,1) +
+     &      lhsKBdiag(1,2,i)*rhstmp(i,2) + lhsKBdiag(1,3,i)*rhstmp(i,3)
+          zs(i,2) = lhsKBdiag(2,1,i)*rhstmp(i,1) +
+     &      lhsKBdiag(2,2,i)*rhstmp(i,2) + lhsKBdiag(2,3,i)*rhstmp(i,3)
+          zs(i,3) = lhsKBdiag(3,1,i)*rhstmp(i,1) +
+     &      lhsKBdiag(3,2,i)*rhstmp(i,2) + lhsKBdiag(3,3,i)*rhstmp(i,3)
+        enddo
+        
+        tauk = 0d+0
+
+        do i = 1, NNODZ
+          do j = 1, 3
+            tauk = tauk + zs(i,j)*rhstmp(i,j)
+          enddo
+        enddo
+        
+        if(iter.eq.1) then
+          beta = 0d+0
+          pv = zs
+        else
+          beta = tauk/taukm1
+          pv(:,:) = zs(:,:) + beta*pv(:,:)
+        endif
+          
+c...    Periodicity (Slave = Master)         
+        
+        do i = 1, NNODZ
+          
+          if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3))
+     &      then
+            pv(i,:) = pv(IPER(i),:) ! Slave = Master
+          endif
+          
+        enddo
+        
+c...    Product
+        
+        call SparseProd_3D (colvec, rowvec, pv, prodtmp)
+        
+        
+c...    Communicate to Masters, Zero out Slaves
+        
+        do i = 1, NNODZ
+          
+          if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3))
+     &      then
+            
+            prodtmp(IPER(i),:) = prodtmp(IPER(i),:) + 
+     &        prodtmp(i,:)      ! Master = Master+Slave
+            prodtmp(i,:) = 0d+0 ! Slave = zero
+            
+          endif
+          
+        enddo           
+        
+c...    
+        
+        pp = 0d+0
+        rr1 = 0d+0
+        rr2 = 0d+0
+        rr3 = 0d+0
+        
+        do n = 1, NNODZ
+          rr1 = rr1 + pv(n,1)*prodtmp(n,1)
+          rr2 = rr2 + pv(n,2)*prodtmp(n,2)
+          rr3 = rr3 + pv(n,3)*prodtmp(n,3)
+        enddo
+        
+        pp = rr1 + rr2 + rr3
+        
+        alpha = tauk/pp
+        
+c....   calculate the next guess
+        
+        soln(:,1) = soln(:,1) + alpha*pv(:,1)
+        soln(:,2) = soln(:,2) + alpha*pv(:,2)
+        soln(:,3) = soln(:,3) + alpha*pv(:,3)
+        
+c....   calculate new res. and dot prod. 
+        
+        rhstmp(:,1) = rhstmp(:,1) - alpha*prodtmp(:,1)
+        rhstmp(:,2) = rhstmp(:,2) - alpha*prodtmp(:,2)
+        rhstmp(:,3) = rhstmp(:,3) - alpha*prodtmp(:,3)
+        
+        tmprr = rr
+        
+        rr = 0
+        rr1 = 0
+        rr2 = 0
+        rr3 = 0
+        
+        do n = 1, NNODZ
+          rr1 = rr1 + rhstmp(n,1)*rhstmp(n,1)
+          rr2 = rr2 + rhstmp(n,2)*rhstmp(n,2)
+          rr3 = rr3 + rhstmp(n,3)*rhstmp(n,3)
+        enddo
+        
+        rr = rr1 + rr2 + rr3
+        
+c....   check for convergence 
+
+        taukm1 = tauk
+
+        if (mod(iter,100).eq.0) then
+          write(*,*) "Iteration =  ", iter
+          write(*,*) "Residual reduction =  ", sqrt(rr/rr0)
+        endif 
+
+c        stop 
+        if(sqrt(rr/rr0).lt.1d-12) goto 6000
+        
+      enddo
+      
+ 6000 continue
+      
+c...  communicate solution
+      
+      do i = 1, NNODZ
+        
+        if ((IBC(i,1).eq.3).or.(IBC(i,2).eq.3).or.(IBC(i,3).eq.3))
+     &    then
+          soln(i,:) = soln(IPER(i),:) ! Slave = Master
+        endif
+        
+      enddo
+      
+      deallocate(lhsKBdiag)
+      
+      write(*,9000) iter, sqrt(rr / rr0)
+      
+      return
+ 9000 format(20x,'  number of iterations:', i10,/,
+     &  20x,'    residual reduction:', 2x,e10.2)
+      
+      end

SparseMatLoc_3D.f

+c
+c modified from original code of Jurijs Bazilevs, who stole it from someone else
+c  (this code is the village bicycle)
+c
+c July 1, 2003 
+c J. Austin Cottrell, III
+c
+c renamed SparseMatLoc_3D to be in keeping with 3D suffix convention, but no
+c    changes made.
+
+
+      subroutine SparseMatLoc_3D( list, n, target, locat )
+      
+      integer	n
+      integer	list(n),	target
+      integer	rowvl,	rowvh,	rowv, locat
+c     
+c.... Initialize
+c     
+      rowvl = 1
+      rowvh = n + 1
+c     
+c.... do a binary search
+c     
+ 100  if ( rowvh-rowvl .gt. 1 ) then
+         rowv = ( rowvh + rowvl ) / 2
+         if ( list(rowv) .gt. target ) then
+            rowvh = rowv
+         else
+            rowvl = rowv
+         endif
+         goto 100
+      endif
+c     
+c.... return
+c     
+      locat = rowvl
+c     
+      return
+      end
+c     Yuri P. Bazilevs
+      subroutine VizGMV_3D(ytemp)
+      
+      
+      use aAdjKeep
+      
+      include "common.h"
+      
+      
+c...  Local variables
+      integer e, igauss, jgauss, i, icount, j, k, kgauss, aa,
+     &  NEL_NZA_QUAD, NEL_LIN, NEL_LINm1, icount2, iln, iel, nnodz_lin
+      integer ni,nj,nk,strss_file
+      real*8 shl(NSHL), shg(NSHL), shgradl(NSHL,NSD),
+     &  shgradg(NSHL, NSD), QuantQuad(27,9), ytemp(NNODZ,NSD)
+      real*8 gp(NGAUSS), gw(NGAUSS), skl(2,2,NSD), stl(NSHL,NSD)
+      real*8 lambda, numod, Emod, mu, yl(NSHL,NSD), dist, distmin,
+     &  summ, std, stdmax
+      
+      character*8 word1, word2, word3, word4, 
+     &  word5, word6, varname
+      character*30 fname
+      character*10 cname