Commits

Matthew Emmett  committed f8ac3b1 Merge

Merge branch 'master' of bitbucket.org:memmett/libpfasst

  • Participants
  • Parent commits 5d6a4b2, 6e8d3dd

Comments (0)

Files changed (33)

File Makefile.defaults

 # Default build settings for libpfasst.
 #
 
-FC = gfortran
+FC = mpif90
 CC = mpicc
 AR = ar rcs
 PY = python

File Makefile.external

 # hdf5
 #
 
-HDF5 = hdf5-1.8.11
+HDF5 = hdf5-1.8.13
 HDF5_CONFIG = --enable-fortran --enable-parallel
 
 hdf5: $(HDF5).tar.gz

File Makefile.rules

 build/pf_cycle.o:       build/pf_dtype.o
 build/pf_ndarray.o:     build/pf_dtype.o
 build/pf_utils.o:       build/pf_timer.o
-build/pf_explicit.o:    build/pf_timer.o
+build/pf_explicit.o:    build/pf_timer.o build/pf_utils.o
 build/pf_hooks.o:       build/pf_timer.o
 build/pf_hooks.o:       build/pf_timer.o
-build/pf_implicit.o:    build/pf_timer.o
+build/pf_implicit.o:    build/pf_timer.o build/pf_utils.o
 build/pf_mpi.o:         build/pf_timer.o
 build/pf_logger.o:      build/pf_hooks.o
-build/pf_imex.o:        build/pf_timer.o build/pf_explicit.o build/pf_implicit.o
+build/pf_imex.o:        build/pf_timer.o build/pf_explicit.o build/pf_implicit.o build/pf_utils.o
 build/pf_restrict.o:    build/pf_utils.o build/pf_timer.o build/pf_hooks.o
 build/pf_interpolate.o: build/pf_restrict.o build/pf_hooks.o
 build/pf_parallel.o:    build/pf_interpolate.o build/pf_hooks.o build/pf_cycle.o

File examples/mpi-ad3/Matlab/Vconv.m

+%  Routine to plot error versus iteration by Vcycle
+clear
+Niter=10
+figure(1);clf
+figure(2);clf
+Nx=128; Nstep=64; Nproc=Nstep;
+%Nx=32; Nstep=128; Nproc=Nstep;
+
+N_Vrack=[1,2,3,4,10]
+
+for NN = 1:length(N_Vrack)
+   N_V=N_Vrack(NN)
+fbase=['pfasst_V_'];
+fspec=['Niter',num2str(Niter,'%02d'),'_Nx',num2str(Nx,'%03d'),'_Nstep',num2str(Nstep,'%03d'),'_Nproc',num2str(Nproc,'%03d'),'_',num2str(N_V,'%03d'),'_',num2str(Nstep,'%03d')];
+fname=['../Dat/',fbase,fspec,'.m']
+q=load(fname);
+q_end_ind = find(q(:,1)==3 );
+q128=q(q_end_ind,:);
+
+iter = q128(:,4);
+pde_err = q128(:,6);
+ode_err = q128(:,7);
+res = q128(:,8);
+
+switch(N_V)
+  case 1,
+   clr = '-bx'
+  case 2,
+   clr = '-rx'
+  case 3,
+   clr = '-gx'
+  case 4,
+   clr = '-kx'
+  case 5,
+   clr = '-mx'
+  case 6,
+   clr = '-bo'
+  case 7,
+   clr = '-ro'
+  case 8,
+   clr = '-go'
+  case 9,
+   clr = '-ko'
+  case 10,
+   clr = '-mo'
+end
+xup = 1e-4
+figure(2)
+subplot(1,3,1)
+semilogy(iter,ode_err,clr,'MarkerSize',10); 
+axis([0 10,1e-7,xup])
+title('ODE Err','FontSize',14), hold on;
+xlabel('Iteration','FontSize',14)
+set(gca,'FontSize',14)
+subplot(1,3,2)
+semilogy(iter,pde_err,clr,'MarkerSize',10); 
+axis([0 10,1e-7,xup])
+xlabel('Iteration','FontSize',14)
+title('PDE Err','FontSize',14), hold on;
+set(gca,'FontSize',14)
+subplot(1,3,3)
+semilogy(iter,res,clr,'MarkerSize',10); 
+axis([0 10,1e-11,xup])
+xlabel('Iteration','FontSize',14)
+title('Residual','FontSize',14);hold on;
+set(gca,'FontSize',14)
+end
+legend('NumV=1','NumV=2','NumV=3','NumV=4','NumV=10')

File examples/mpi-ad3/Matlab/hybrid.m

+clear;
+N = 128;
+k=[1:N/2]*2*pi;
+dx=1/N;
+L2=(-2+2*cos(k*dx))/(dx*dx);
+L4=(-30+32*cos(k*dx)-2*cos(2*k*dx))/(12.0*dx*dx);
+
+Nj = 1000;
+for j = 1:Nj
+    nu=0.1*j;
+    stp = 1 + nu*L2./(1-nu*L2);
+    rho22(j) = max(abs(stp));
+    stp = 1 + nu*L4./(1-nu*L4);
+    rho44(j) = max(abs(stp));
+    stp = 1 + nu*L4./(1-nu*L2);
+    rho42(j) = max(abs(stp));
+    plot(stp);
+    pause
+end
+
+plot(rho22,'-k'); hold on;
+plot(rho44,'-b') 
+plot(rho42,'-r'); hold off;

File examples/mpi-ad3/Vconv.py

+import sys
+import subprocess
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+
+#  Run examples to check the iterations in PFASST by Vcyle
+#Nstep_rack.append([300,400,600,800,1600,3200,4800,6400,9600])
+#Nstep_rack.append([200,300,400,600,800,1600,3200,4800,6400,9600])
+#Nstep_rack.append([100,200,250,300,400,500,600,800,1200,1600,2000,2400,3200,4800,6400])
+
+Nnode_rack =[4];
+Numruns=12
+Niters=10
+Nsteps = 128
+Nx=32
+Nvars= [Nx/4, Nx/2, Nx]
+dt = 1.0/(Nsteps)
+
+for N in range(1,11):
+    table = {'Niters' : Niters, "Nsteps" : Nsteps, "Dt" : dt, "Nvar1": Nvars[0], "N_V": N}
+    mycmd= 'mpiexec -n %(Nsteps)s ./main.exe probin.nml  niters=%(Niters)s  nsteps=%(Nsteps)s dt=%(Dt)s N_Vcycles=%(N_V)s' % table
+    print mycmd
+    subprocess.call(mycmd, shell=True)
+
+            #  Plot on the fly
+            #fname = 'Dat/conv_Niter%(Niters)02d_Nsteps%(Nsteps)04d_Nnodes%(Nnodes)d_E.m' % table
+
+            #print fname
+            #with open(fname,'r') as f:
+            #    a=np.loadtxt(f)
+            #print a.shape, a.ndim, Niters
+            #if a.ndim < 2:
+            #    errors[eind]=a[3]
+            #else:
+            #    errors[eind]=a[Niters-1][3]
+
+        
+#        plt.loglog(Nstep_rack[Niters-1],errors,'-*')
+#        plt.title('Convergence with %s Lobatto Nodes' % Nnodes)
+#        plt.xlabel('Number of time step')
+#        plt.ylabel('Rel. Error in Position')
+#        if (Nnodes == 4):
+#            plt.legend(["1 iter","2 iters","3 iters","4 iters"],loc=4)
+#plt.show()
+            
+
+

File examples/mpi-ad3/kiter.nml

+!  parameters to run the tests of error versus iteration for each kfreq
+&PARAMS
+  nprob         = 0	 
+  ndim          = 1
+
+  niters       = 20
+  nsteps       = 1
+
+  poutmod  = 1
+
+  !  Heat equation
+  v            = 0.0
+  nu           = 1.0
+  nlevs = 1
+!  nvars  =   32 64 128
+!  nvars  =    512 512
+  nvars  =    512
+
+  nnodes =     2
+!  nnodes =    3  5 
+!  nnodes =   2 3  3 
+  Finterp = F
+
+  dt           = 0.00625
+!  Tfin = 1.0
+  pfasst_nml = "pfasst.nml"
+  fbase = "Ktest"
+  do_spec = 0
+  N_Vcycles = 20 
+  Nrelax = 2
+  mg_interp_order = 2
+  spatial_order = 2
+
+/

File examples/mpi-ad3/kiter.py

+import sys
+import subprocess
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+
+#  Run examples to check the iterations in PFASST
+#Nstep_rack.append([300,400,600,800,1600,3200,4800,6400,9600])
+#Nstep_rack.append([200,300,400,600,800,1600,3200,4800,6400,9600])
+#Nstep_rack.append([100,200,250,300,400,500,600,800,1200,1600,2000,2400,3200,4800,6400])
+
+Nnode_rack =[4];
+Numruns=12
+Niters=10
+for N in range(5,8):
+    Nx = 2**N
+    Nsteps = Nx
+    Nvars= [Nx/4, Nx/2, Nx]
+    print Nvars
+    dt = 1.0/(Nsteps)
+    print Nsteps,dt
+    table = {'Niters' : Niters, "Nsteps" : Nsteps, "Dt" : dt, "Nvar1": Nvars[0], "Nvar2": Nvars[1], "Nvar3": Nvars[2]}
+    mycmd= 'mpiexec -n %(Nsteps)s ./main.exe probin.nml  niters=%(Niters)s  nsteps=%(Nsteps)s dt=%(Dt)s nvars="%(Nvar1)s %(Nvar2)s %(Nvar3)s"' % table
+    print mycmd
+    subprocess.call(mycmd, shell=True)
+
+            #  Plot on the fly
+            #fname = 'Dat/conv_Niter%(Niters)02d_Nsteps%(Nsteps)04d_Nnodes%(Nnodes)d_E.m' % table
+
+            #print fname
+            #with open(fname,'r') as f:
+            #    a=np.loadtxt(f)
+            #print a.shape, a.ndim, Niters
+            #if a.ndim < 2:
+            #    errors[eind]=a[3]
+            #else:
+            #    errors[eind]=a[Niters-1][3]
+
+        
+#        plt.loglog(Nstep_rack[Niters-1],errors,'-*')
+#        plt.title('Convergence with %s Lobatto Nodes' % Nnodes)
+#        plt.xlabel('Number of time step')
+#        plt.ylabel('Rel. Error in Position')
+#        if (Nnodes == 4):
+#            plt.legend(["1 iter","2 iters","3 iters","4 iters"],loc=4)
+#plt.show()
+            
+
+

File examples/mpi-ad3/pfasst.nml

 &PF_PARAMS
    !  These are internal pfasst variables that must be set
-    nlevels  = 3
+    nlevels  = 1
 
 
     !  These are internal pfasst variables that can be reset
     niters = 10   !  default is 5
 
      !  Type of quadrature nodes (default is 1=Gauss-Lobatto)
-!     qtype   = 1028
-     qtype   = 1
+     qtype   = 1028
+!     qtype   = 1
 
      !  Type of time step windowing   (default is 1 = block windowing)
      window = 1      
      rel_res_tol = 1.0d-14
 
      !  Variable which determine how the predictor runs  (default is .false. and .true.)
-     Pipeline_G =  .true.
+     Pipeline_G =  .false.
      PFASST_pred = .true.
 
      echo_timings = .false.

File examples/mpi-ad3/probin.nml

 !  nu           = 1.0
   nlevs = 3
   nvars  =   32 64 128
+  nvars  =   8 16 32 
 !  nnodes =    3  5 9
   nnodes =   2 3  3 
   Finterp = F
 !  dt           = 0.0078125
-!  dt           = 0.015625
+  dt           = 0.015625
 !  dt           = 0.03125
 !  dt           = 0.04
 !  dt           = 0.3333333333
 !  dt           = 0.25
   Tfin = 0.0
   pfasst_nml = "pfasst.nml"
-  fbase = "pfasst_heat"
+  fbase = "pfasst_V"
   do_spec = 0
   N_Vcycles = 2 
   Nrelax = 2
   mg_interp_order = 2
+  spatial_order = 2
 
 /

File examples/mpi-ad3/src/feval.f90

        dx = 1.0_pfdp/dble(Nx)
        allocate(ybc(1-2:Nx+2))
        call fill_bc_1d(y,ybc,Nx,2)
-       do i = 1,Nx
-          f1(i)=-v*(ybc(i+1)-ybc(i-1))/(2.0_pfdp*dx)
-       end do
+       if (spatial_order == 2) then
+          do i = 1,Nx
+             f1(i)=-v*(ybc(i+1)-ybc(i-1))/(2.0_pfdp*dx)
+          end do
+       else
+          do i = 1,Nx
+             f1(i)=-v*(-(ybc(i+2)-ybc(i-2)) + 16.0_pfdp*(ybc(i+1)-ybc(i-1))/(12.0_pfdp*dx))
+          end do
+       endif
        deallocate(ybc)
     endif
 
        dx = 1.0_pfdp/dble(Nx)
        allocate(ybc(1-2:Nx+2))
        call fill_bc_1d(y,ybc,Nx,2)
-       do i = 1,Nx
-          f2(i)=nu*(-2.0_pfdp*ybc(i) + ybc(i-1)+ybc(i+1))/(dx*dx)
-       end do
+       if (spatial_order == 2) then
+          do i = 1,Nx
+             f2(i)=nu*(-2.0_pfdp*ybc(i) + ybc(i-1)+ybc(i+1))/(dx*dx)
+          end do
+       else
+          do i = 1,Nx
+             f2(i)=nu*(-30.0_pfdp*ybc(i) + 16.0_pfdp*(ybc(i-1)+ybc(i+1))-ybc(i-2)-ybc(i+2))/(12.0_pfdp*dx*dx)
+!             f2(i)=nu*(-2.0_pfdp*ybc(i) + ybc(i-1)+ybc(i+1))/(dx*dx)
+          end do
+       endif
        deallocate(ybc)
 
     endif
     integer(c_int), intent(in)         :: level
 
     real(pfdp),    pointer :: y(:), rhs(:), f2(:)
-    real(pfdp)   :: nudt,maxresid
+    real(pfdp)   :: nudt,maxresid,dx
     type(work1),   pointer :: work
     complex(pfdp), pointer :: wk(:)
-    integer :: k,N_V
-    type(ndarray),pointer :: ymg
+    integer :: k,N_V,Nx,i,j
     type(ndarray),pointer  :: rhsmg
+    type(ndarray),pointer  :: ymg
+    real(pfdp), allocatable :: y0(:)
+    real(pfdp), allocatable :: rhs0(:)
+    real(pfdp), allocatable :: ybc(:)
 
+    Nx = size(y)
 
     call c_f_pointer(levelctx, work)
 
-
     y   => array1(yptr)
     rhs => array1(rhsptr)
     f2  => array1(f2ptr)
       endif
       call c_f_pointer(yptr,ymg)
       call c_f_pointer(rhsptr,rhsmg)
-      if (level ==1 ) then
-         N_V=3
-      else
-         N_V=1
-      endif
-      do k=1,N_Vcycles
-         call Vcycle(ymg, t, nudt, rhsmg, level, c_null_ptr, Nrelax,maxresid) 
-      end do
-      call f2eval1(yptr, t, level, levelctx, f2ptr)
+
+      allocate(y0(1:Nx))
+      dx = 1.0_pfdp/dble(Nx)
+      y0=rhs
+      if (spatial_order == 4) then
+         allocate(rhs0(1:Nx))
+         rhs0=rhs + dt*f2
+         y = rhs + dt*f2
+
+         !  Subtract L2 from rhs
+         allocate(ybc(1-2:Nx+2))
+         do j = 1,10
+            rhs = rhs0 - dt*f2
+            call fill_bc_1d(y,ybc,Nx,2)
+            do i = 1,Nx
+               rhs(i)=rhs0(i)-nu*dt*(-2.0_pfdp*ybc(i) + ybc(i-1)+ybc(i+1))/(dx*dx)
+               rhs(i)=rhs(i)+nu*dt*(-30.0_pfdp*ybc(i) + 16.0_pfdp*(ybc(i-1)+ybc(i+1))-ybc(i-2)-ybc(i+2))/(12.0_pfdp*dx*dx)
+            end do
+            do k=1,N_Vcycles
+               call Vcycle(ymg, t, nudt, rhsmg, level, c_null_ptr, Nrelax,maxresid) 
+            end do
+            call f2eval1(yptr, t, level, levelctx, f2ptr)
+         end do
+         deallocate(ybc)
+       else
+          do k=1,N_Vcycles
+             call Vcycle(ymg, t, nudt, rhsmg, level, c_null_ptr, Nrelax,maxresid) 
+          end do
+          call f2eval1(yptr, t, level, levelctx, f2ptr)
+       endif
+
+       deallocate(y0)          
+
    endif
 
   end subroutine f2comp1

File examples/mpi-ad3/src/hooks.f90

 module hooks
   use pf_mod_dtype
   use pf_mod_ndarray
-  use probin, only: nprob,poutmod, fbase, foutbase
+  use probin, only: nprob,poutmod, fbase, foutbase, N_Vcycles,kfreq
   implicit none
 contains
 
     integer :: un
     character(len=64) :: fout 
     character(len=7) :: stepstring
-    qend => array1(level%qend)
+    character(len=3) :: Vstring
+    character(len=3) :: Kstring
+!    qend => array1(level%qend)
+    qend => array1(level%Q(level%Nnodes))
     t = state%t0+state%dt   
     call exact(t, level%nvars, yexact)
-    max_y=maxval(abs(yexact))
+!    max_y=maxval(abs(yexact))
+    max_y=maxval(abs(qend))
     err = maxval(abs(qend-yexact))
     call exact_ode(t, level%nvars, yexact)
     ODE_err = maxval(abs(qend-yexact))
 
     call pf_residual(pf, level, state%dt)
     res= level%residual
-!    print '(" lev:",i5," step:",i5," t=",es10.3," iter:",i3," Max_y:",es13.6," Err:",es13.6," ODEERR:",es13.6," Res:",es13.6)', &
-!               level%level,state%step+1, t,state%iter, max_y,err,ODE_err,res
+    print '(" lev:",i5," step:",i5," t=",es10.3," iter:",i3," Max_y:",es13.6," Err:",es13.6," ODEERR:",es13.6," Res:",es13.6)', &
+               level%level,state%step+1, t,state%iter, max_y,err,ODE_err,res
 
     un = 1000+state%step+1
     write(stepstring,"(I0.3)") state%step+1
-    fout = trim(foutbase)//'_'//trim(stepstring)//'.m'
+    write(Vstring,"(I0.2)") N_Vcycles
+    write(Kstring,"(I0.3)") kfreq
+    fout = trim(foutbase)//'N_V'//trim(Vstring)//'N_step'//trim(stepstring)//'K'//trim(Kstring)//'.m'
     open(unit=un, file = fout, status = 'unknown', action = 'write', position='append')
     write(un,*)  level%level,state%step+1,t,state%iter,max_y,err,ODE_err,res
     close(un)

File examples/mpi-ad3/src/main.f90

   integer        :: ierror, iprovided, l
   character(len = 64) :: fout
   character(len = 64) :: fname
-  character(256) :: probin_fname
+  character(len = 64) :: probin_fname
+  character(len = 64) :: shell_cmd
   integer        :: iout,nout
 
 
   ! read options
   !
 
-  if (command_argument_count() == 1) then
+  if (command_argument_count() > 1) then
      call get_command_argument(1, value=probin_fname)
   else
      probin_fname = "probin.nml"
   if (ierror .ne. 0) &
        stop "ERROR: Can't initialize MPI."
 
-
   !
   ! initialize pfasst
   !
   ! run
   !
 
-!  call pf_add_hook(pf, nlevs, PF_POST_SWEEP, echo_error_hook)
+  call pf_add_hook(pf, -1, PF_POST_PREDICTOR, echo_error_hook)
   call pf_add_hook(pf,-1,PF_POST_ITERATION,echo_error_hook)
 
   call ndarray_create_simple(q1, pf%levels(nlevs)%shape)
      nsteps = comm%nproc
   end if
 
+  !  Make directory for Data if it does not exist
+  if (len_trim(output) > 0) then
+     call ndarray_mkdir(output, len_trim(output))
+     ! call pf_add_hook(pf, nlevs, PF_POST_SWEEP, ndarray_dump_hook)
+!     call pf_add_hook(pf, -1, PF_POST_SWEEP, ndarray_dump_hook)
+  end if
+
+  call system('if [ ! -e ./Dat ]; then mkdir Dat; fi')
+  shell_cmd = 'if [ ! -e ./Dat/'//trim(fbase)//' ]; then mkdir Dat/'//trim(fbase)//'; fi'
+  call system(shell_cmd)
   ! open output files
   write (fname, "(A,I0.2,A3,I0.3,A6,I0.3,A6,I0.3)") 'Niter',pf%niters,'_Nx',nvars(nlevs),'_Nstep',nsteps,'_Nproc',comm%nproc
-  foutbase = 'Dat/'//trim(fbase)//'_'//trim(fname)
-
-  foutbase = 'Dat/'//trim(fbase)//'_'//trim(fname)
+  foutbase = 'Dat/'//trim(fbase)//'/'//trim(fname)
   print *,'foutbase=',foutbase
 
 !  open(unit=101, file = foutbase, status = 'unknown', action = 'write')
   !  Output the run parameters
   if (pf%rank == 0) then
      call pf_print_options(pf, 6,.TRUE.)
-     fout = 'Dat/'//trim(fbase)//'_'//trim(fname)//'_params.m'
+     fout = 'Dat/'//trim(fbase)//'/'//trim(fname)//'_params.m'
      print*, fout
      open(unit=103, file = fout, status = 'unknown', action = 'write')
      do iout=1,2
         write(nout,*) 'dt=',dt
         write(nout,*) 'nu=',nu
         write(nout,*) 'v=',v
+        write(nout,*) 'kfreq=',kfreq
         write(nout,*) 'do_spec',do_spec
+        write(nout,*) 'fbase=',fbase
         if (do_spec .eq. 0) then
            write(nout,*) 'N_Vcycles=',N_Vcycles
            write(nout,*) 'Nrelax',Nrelax

File examples/mpi-ad3/src/multigrid.f90

     allocate(ybc(1-spatial_order:Nx+spatial_order))
 
 
-    select case (spatial_order)
-    case (2)  ! Centered diff
+!    select case (spatial_order)
+!    case (2)  ! Centered diff
        sig = nudt/(dx*dx)
        sigi = (dx*dx)/(nudt)
 !!$       do n = 1,Nrelax
 !!$          end do
 !!$       end do
 
-    case default
-       write(*,*) 'Bad case in multigrid.f90, relax:  spatial_order=',spatial_order
-    end select
+ !   case default
+ !      write(*,*) 'Bad case in multigrid.f90, relax:  spatial_order=',spatial_order
+ !   end select
 
     deallocate(ybc)
   end subroutine relax_1d
     Nx = yptr%shape(1)
     dx    = Lx/dble(Nx)
 
-    select case (spatial_order)
-    case (2)  ! Centered diff
+!    select case (spatial_order)
+!    case (2)  ! Centered diff
 !!$       sigi = (dx*dx)/(nudt)
 !!$       do n = 1,Nrelax
 !!$          do irb = 1,2
 !!$          end do
 !!$       end do
 
-    case default
-       write(*,*) 'Bad case in multigrid.f90, relax:  spatial_order=',spatial_order
-    end select
+!    case default
+!       write(*,*) 'Bad case in multigrid.f90, relax:  spatial_order=',spatial_order
+!    end select
   end subroutine relax_2d
   
   recursive subroutine Vcycle(yptr, t, nudt, rhsptr, level, ctx, Nrelax,maxresid)
 !!$       Lap = (-30.0_pfdp*y%array + 16.0_pfdp*(yp + ym)-(ypp+ymm))/(12.0_pfdp*dx*dx)
 !!$
     case default
-       write(*,*) 'Bad case in feval.f90, feval_init:  spatial_order=', spatial_order
+       write(*,*) 'Bad case in feval.f90, feval_init:  dim=', yptr%dim
     end select
   end subroutine resid
 
 !!$       Lap = (-30.0_pfdp*y%array + 16.0_pfdp*(yp + ym)-(ypp+ymm))/(12.0_pfdp*dx*dx)
 !!$
     case default
-       write(*,*) 'Bad case in feval.f90, feval_init:  spatial_order=', spatial_order
+       write(*,*) 'Bad case in feval.f90, feval_init:  dim=', yptr%dim
     end select
     res = rhs - (y - nudt*Lap)
 !!$    print *,'----------------------------------------------------'
     integer   :: i
 
     ybc(1:Nx)=y
-    do i = 1,Nbc
-       ybc(Nx+i)=-ybc(i)
-       ybc(1-i)=-ybc(Nx+1-i)
-    end do
-    
+    ! do i = 1,Nbc
+    !    ybc(Nx+i)=-ybc(Nx-i)
+    !    ybc(1-i)=-ybc(Nx+1-i)
+    ! end do
+
+    call set_bc_1d(ybc,Nx,Nbc)
+
   end subroutine fill_bc_1d
   subroutine set_bc_1d(ybc, Nx, Nbc)
     integer, intent(in)      ::  Nx, Nbc
     real(pfdp), intent(inout) :: ybc(1-Nbc:Nx+Nbc)
     integer   :: i
     do i = 1,Nbc
-       ybc(Nx+i)=-ybc(i)
-       ybc(1-i)=-ybc(Nx+1-i)
+       ! ybc(Nx+i)=-ybc(i)
+       ybc(1-i)=-ybc(1+i)
+    end do
+    ybc(Nx+1) = 0
+    do i = 1,Nbc-1
+       ybc(Nx+1+i)=-ybc(Nx+1-i)
+       ! ybc(1-i)=-ybc(Nx+1-i)
     end do
 
 !!$    ybc(Nx+1)=ybc(1)

File examples/mpi-ad3/src/probin.f90

   double precision, save :: nu     ! viscosity
   double precision, save :: t0     ! initial time for exact solution
   double precision, save :: sigma  ! initial condition parameter
+  integer,          save :: kfreq  ! initial condition parameter
   double precision, save :: dt     ! time step
   double precision, save :: Tfin   ! Final time
 
   namelist /params/ spatial_order,interp_order, mg_interp_order, do_spec, N_Vcycles,Nrelax
   namelist /params/ window_type, pfasst_nml,fbase ,poutmod
   namelist /params/  abs_tol, rel_tol 
-  namelist /params/  v, nu, t0, dt, Tfin,sigma 
+  namelist /params/  v, nu, t0, dt, Tfin,sigma, kfreq
 
 contains
 
     Lx      = 1._pfdp
     nu      = 0.02_pfdp
     sigma   = 0.004_pfdp
+    kfreq   = 1
     t0      = 0.25_pfdp
     dt      = 0.01_pfdp
     Tfin    = 0.0_pfdp
     !
     !  Read in stuff from a file
     un = 9
-!    write(*,*) 'opening file ',TRIM(filename), '  for input'
+    write(*,*) 'opening file ',TRIM(filename), '  for input'
     open(unit=un, file = filename, status = 'old', action = 'read')
     read(unit=un, nml = params)
     close(unit=un)

File examples/mpi-ad3/src/solutions.f90

   end subroutine sinusoidal
 
   subroutine exact_ode(t, nvars, yex)
+    use probin, only:  nu, Lx, spatial_order, kfreq
     real(pfdp), intent(in)  :: t
     integer,    intent(in)  :: nvars
     real(pfdp), intent(out) :: yex(nvars)
     yex = 0.0_pfdp
 
     dx = 1.0_pfdp/dble(nvars)
-    rho = (2.0_pfdp-2.0_pfdp*cos(pi*dx))/(dx*dx)
+    rho = (2.0_pfdp-2.0_pfdp*cos(kfreq*pi*dx))/(dx*dx)
     do i = 1, nvars
        x = dx*dble(i-1) - t*v
-       yex(i) = yex(i) + dsin(pi*x)*dexp(-rho*nu*t)
+       yex(i) = yex(i) + dsin(kfreq*pi*x)*dexp(-rho*nu*t)
     end do
 
   end subroutine exact_ode
     dx = 1.0_pfdp/dble(nvars)
     do i = 1, nvars
        x = dx*dble(i-1) - t*v
-       yex(i) = yex(i) + dsin(pi*x)*dexp(-pi*pi*nu*t)
+       yex(i) = yex(i) + dsin(kfreq*pi*x)*dexp(-kfreq*kfreq*pi*pi*nu*t)
     end do
 
   end subroutine exact

File examples/mpi-navier-stokes/Makefile

 
 LIBPFASST ?= ../..
 FFTW3     ?= $(LIBPFASST)/fftw3
+HDF5      ?= $(LIBPFASST)/hdf5
 
 EXE = main.exe
 
 include $(LIBPFASST)/Makefile.defaults
 
 VPATHS += src
-FSRC   += src/main.f90 src/feval.f90 src/transfer.f90 src/hooks.f90 src/encap.f90 src/fftwpp.f90
+FSRC   += src/main.f90 src/feval.f90 src/transfer.f90 src/hooks.f90 src/encap.f90 src/fftwpp.f90 src/initial.f90
 CSRC   += src/numpy.c
 CXXSRC += src/cfftw++.cpp src/fftw++.cpp src/convolution.cpp
 
-FFLAGS  += -I$(FFTW3)/include -fopenmp
+FFLAGS  += -O3 -I$(FFTW3)/include -I$(HDF5)/include -fopenmp
 CFLAGS  += -I$(FFTW3)/include -fopenmp
-LDFLAGS += -L$(FFTW3)/lib -lfftw3 -fopenmp -lstdc++ -lfftw3_omp
+LDFLAGS += -fopenmp -lstdc++ -ldl -lz -L$(FFTW3)/lib -lfftw3 -lfftw3_omp -L$(HDF5)/lib -lhdf5_fortran -lhdf5
 
 vpath %.cpp $(VPATHS)
 
 build/solutions.o: build/pfasst.o
 build/feval.o:     build/pfasst.o build/encap.o build/fftwpp.o
 build/hooks.o:     build/numpy.o build/solutions.o
-build/main.o:      build/feval.o build/hooks.o build/transfer.o
+build/main.o:      build/feval.o build/hooks.o build/transfer.o build/initial.o

File examples/mpi-navier-stokes/src/feval.f90

 
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
-  subroutine vortex_sheets(q0)
-    type(carray4), intent(inout) :: q0
-
-    double precision :: x, y, z
-    integer :: ii, i, j, k, n
-
-    type(c_ptr) :: ffft, wkp
-
-    complex(c_double), pointer :: u(:,:,:), v(:,:,:), w(:,:,:), wk(:,:,:)
-    double precision, parameter :: delta = 0.1d0, rho=50.0d0
-
-    n = q0%shape(1)
-
-    allocate(u(n,n,n), v(n,n,n), w(n,n,n))
-
-    u = -1.0d0
-    w =  0.0d0
-
-    do i = 1, n
-       x = dble(i) / n
-       do j = 1, n
-          y = dble(j) / n
-          do k = 1, n
-             z = dble(k) / n
-
-             do ii = -4, 4
-                u(k, j, i) = u(k, j, i) + tanh(rho*(ii + y - 0.25d0))
-                u(k, j, i) = u(k, j, i) + tanh(rho*(ii + 0.75d0 - y))
-             end do
-
-             v(k, j, i) = delta * sin(6.28318530718d0*(x + 0.25d0))
-
-          end do
-       end do
-    end do
-
-    wkp  = fftw_alloc_complex(int(n**3, c_size_t))
-    ffft = fftw_plan_dft_3d(n, n, n, wk, wk, FFTW_FORWARD, FFTW_ESTIMATE)
-
-    call c_f_pointer(wkp, wk, [ n, n, n ])
-
-    wk = u
-    call fftw_execute_dft(ffft, wk, wk)
-    q0%array(:,:,:,1) = wk
-
-    wk = v
-    call fftw_execute_dft(ffft, wk, wk)
-    q0%array(:,:,:,2) = wk
-
-    wk = w
-    call fftw_execute_dft(ffft, wk, wk)
-    q0%array(:,:,:,3) = wk
-
-    q0%array = q0%array / n**3
-
-    deallocate(u,v,w,wk)
-
-    call fftw_destroy_plan(ffft)
-
-  end subroutine vortex_sheets
-
-  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
   subroutine divergence(cptr, n1, n2, n3, u, div)
     implicit none
     type(c_ptr),               intent(in), value :: cptr
     call c_f_pointer(ctx%vw, vw, [ ctx%n, ctx%n, ctx%n ])
     call c_f_pointer(ctx%ww, ww, [ ctx%n, ctx%n, ctx%n ])
 
-    !$omp parallel workshare
+!xxx    !$omp parallel workshare
     u1 = y%array(:,:,:,1)
     v1 = y%array(:,:,:,2)
     w1 = y%array(:,:,:,3)
     uw = y%array(:,:,:,1)
     vw = y%array(:,:,:,2)
     ww = y%array(:,:,:,3)
-    !$omp end parallel workshare
+!xxx    !$omp end parallel workshare
   end subroutine copy_for_convolve
 
   subroutine eval_f1(yptr, t, level, ctxptr, f1ptr)

File examples/mpi-navier-stokes/src/hooks.f90

-
-
 module hooks
   use pfasst
   use encap
     deallocate(ustar)
   end subroutine project_hook
 
+  subroutine dump(dname, fname, q)
+    use pf_mod_ndarray
+    character(len=*), intent(in   ) :: dname, fname
+    type(carray4),    intent(in   ) :: q
+
+    real(c_double), pointer :: rarray(:)
+    type(c_ptr) :: tmp
+
+    tmp = c_loc(q%array(1,1,1,1))
+    call c_f_pointer(tmp, rarray, [ product(2*q%shape) ])
+
+    call ndarray_dump_numpy(trim(dname)//c_null_char, trim(fname)//c_null_char, '<c16'//c_null_char, &
+         4, q%shape, product(2*q%shape), rarray)
+
+  end subroutine dump
+
+
+
+  subroutine dump_hook(pf, level, state, levelctx)
+    use pf_mod_ndarray
+
+    type(pf_pfasst_t),   intent(inout) :: pf
+    type(pf_level_t),    intent(inout) :: level
+    type(pf_state_t),    intent(in)    :: state
+    type(c_ptr),         intent(in)    :: levelctx
+
+    character(len=256)     :: fname
+    type(carray4), pointer :: qend
+
+    call c_f_pointer(level%qend, qend)
+
+    write(fname, "('s',i0.5,'i',i0.3,'l',i0.2,'.npy')") &
+         state%step, state%iter, level%level
+
+    call dump(pf%outdir, fname, qend)
+
+  end subroutine dump_hook
+
 end module hooks

File examples/mpi-navier-stokes/src/initial.f90

+module initial
+  use encap
+  implicit none
+  include 'fftw3.f03'
+
+  type :: amplitude
+     double precision :: ax, ay, az
+     integer :: kx, ky, kz
+  end type amplitude
+contains
+
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+  subroutine vortex_sheets(q0)
+    type(carray4), intent(inout) :: q0
+
+    double precision :: x, y, z
+    integer :: ii, i, j, k, n
+
+    type(c_ptr) :: ffft, wkp
+
+    complex(c_double), pointer :: u(:,:,:), v(:,:,:), w(:,:,:), wk(:,:,:)
+    double precision, parameter :: delta = 0.1d0, rho=50.0d0
+
+    n = q0%shape(1)
+
+    allocate(u(n,n,n), v(n,n,n), w(n,n,n))
+
+    u = -1.0d0
+    w =  0.0d0
+
+    do i = 1, n
+       x = dble(i) / n
+       do j = 1, n
+          y = dble(j) / n
+          do k = 1, n
+             z = dble(k) / n
+
+             do ii = -4, 4
+                u(k, j, i) = u(k, j, i) + tanh(rho*(ii + y - 0.25d0))
+                u(k, j, i) = u(k, j, i) + tanh(rho*(ii + 0.75d0 - y))
+             end do
+
+             v(k, j, i) = delta * sin(6.28318530718d0*(x + 0.25d0))
+
+          end do
+       end do
+    end do
+
+    wkp  = fftw_alloc_complex(int(n**3, c_size_t))
+    ffft = fftw_plan_dft_3d(n, n, n, wk, wk, FFTW_FORWARD, FFTW_ESTIMATE)
+
+    call c_f_pointer(wkp, wk, [ n, n, n ])
+
+    wk = u
+    call fftw_execute_dft(ffft, wk, wk)
+    q0%array(:,:,:,1) = wk
+
+    wk = v
+    call fftw_execute_dft(ffft, wk, wk)
+    q0%array(:,:,:,2) = wk
+
+    wk = w
+    call fftw_execute_dft(ffft, wk, wk)
+    q0%array(:,:,:,3) = wk
+
+    q0%array = q0%array / n**3
+
+    deallocate(u,v,w,wk)
+
+    call fftw_destroy_plan(ffft)
+
+  end subroutine vortex_sheets
+
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+  subroutine random_full(q0)
+    type(carray4), intent(inout) :: q0
+
+    type(amplitude), allocatable :: amps(:)
+    integer :: namps
+
+    double precision :: kappa, scale
+    integer :: kx, ky, kz, i, j, k, n, a
+
+    double precision :: x, y, z, r(3)
+    double precision, parameter :: sigma = 99.0d0
+
+    complex(c_double), pointer :: u(:,:,:), v(:,:,:), w(:,:,:), wk(:,:,:)
+
+    type(c_ptr) :: ffft, wkp
+
+    n = q0%shape(1)
+
+    allocate(u(n,n,n), v(n,n,n), w(n,n,n))
+    u = 0; v = 0; w = 0
+
+    namps = 0
+    allocate(amps(n*n*n/8))
+
+    do kz = 1, n/2
+       do ky = 1, n/2
+          do kx = 1, n/2
+             if (kx + ky + kz > n/2) cycle
+
+             kappa = dble(kx**2 + ky**2 + kz**2)**0.5
+             scale = kappa**(-5.0/3) * exp(-kappa**2/sigma)
+
+             call random_number(r)
+
+             namps = namps + 1
+             amps(namps)%kx = kx
+             amps(namps)%ky = ky
+             amps(namps)%kz = kz
+             amps(namps)%ax = r(1) * scale
+             amps(namps)%ay = r(2) * scale
+             amps(namps)%az = r(3) * scale
+          end do
+       end do
+    end do
+
+    !$omp parallel do private(i,j,k)
+    do i = 1, n
+       x = 6.28318530718d0 * dble(i-1) / n
+       do j = 1, n
+          y = 6.28318530718d0 * dble(j-1) / n
+          do k = 1, n
+             z = 6.28318530718d0 * dble(k-1) / n
+             do a = 1, namps
+                u(k,j,i) = u(k,j,i) + amps(a)%ax * cos(amps(a)%ky * y) * cos(amps(a)%kz * z)
+                v(k,j,i) = v(k,j,i) + amps(a)%ay * cos(amps(a)%kx * x) * cos(amps(a)%kz * z)
+                w(k,j,i) = w(k,j,i) + amps(a)%az * cos(amps(a)%kx * x) * cos(amps(a)%ky * y)
+             end do
+          end do
+       end do
+    end do
+    !$omp end parallel do
+
+    wkp  = fftw_alloc_complex(int(n**3, c_size_t))
+    ffft = fftw_plan_dft_3d(n, n, n, wk, wk, FFTW_FORWARD, FFTW_ESTIMATE)
+
+    call c_f_pointer(wkp, wk, [ n, n, n ])
+
+    wk = u
+    call fftw_execute_dft(ffft, wk, wk)
+    q0%array(:,:,:,1) = wk
+
+    wk = v
+    call fftw_execute_dft(ffft, wk, wk)
+    q0%array(:,:,:,2) = wk
+
+    wk = w
+    call fftw_execute_dft(ffft, wk, wk)
+    q0%array(:,:,:,3) = wk
+
+    q0%array = q0%array / n**3
+
+    deallocate(u,v,w,wk,amps)
+
+    call fftw_destroy_plan(ffft)
+
+  end subroutine random_full
+
+  subroutine load(q0, fname)
+    use hdf5
+    implicit none
+    type(carray4),    intent(inout) :: q0
+    character(len=*), intent(in   ) :: fname
+
+    integer(hid_t)   :: plist, ctype, file, group, dataset
+    integer(size_t)  :: re_size
+    integer(hsize_t) :: dims(4)
+    integer          :: err
+
+    real(8), pointer    :: buf(:)
+    complex(8), pointer :: cbuf(:,:,:,:)
+
+    type complex_t
+       double precision :: re
+       double precision :: im
+    end type complex_t
+
+    call h5open_f(err)
+
+    ! create 'complex' compound type
+    call h5tget_size_f(H5T_NATIVE_DOUBLE, re_size, err)
+    call h5tcreate_f(H5T_COMPOUND_F, 2*re_size, ctype, err)
+    call h5tinsert_f(ctype, "r", int(0, size_t), H5T_NATIVE_DOUBLE, err)
+    call h5tinsert_f(ctype, "i", int(re_size, size_t), H5T_NATIVE_DOUBLE, err)
+
+    ! open the initial condition file and root group
+    call h5pcreate_f(H5P_FILE_ACCESS_F, plist, err)
+    call h5fopen_f(fname, H5F_ACC_RDONLY_F, file, err, access_prp=plist)
+    call h5gopen_f(file, "/", group, err)
+
+    ! read 'q0' dataset
+    dims = q0%shape
+    allocate(buf(2*product(q0%shape)))
+
+    call h5dopen_f(group, "q0", dataset, err)
+    call h5dread_f(dataset, ctype, buf, dims, err)
+    call h5dclose_f(dataset, err)
+
+    call c_f_pointer(c_loc(buf(1)), cbuf, q0%shape)
+    q0%array = cbuf
+
+    deallocate(buf)
+
+    ! tidy up
+    call h5gclose_f(group, err)
+    call h5fclose_f(file, err)
+    call h5tclose_f(ctype, err)
+    call h5pclose_f(plist, err)
+    call h5close_f(err)
+  end subroutine load
+
+
+end module initial

File examples/mpi-navier-stokes/src/main.f90

 program fpfasst
   use pfasst
   use feval
+  use initial
   use hooks
   use encap
   use transfer
-  use pf_mod_mpi, only: MPI_THREAD_FUNNELED, MPI_COMM_WORLD
+  use pf_mod_ndarray
+  use pf_mod_mpi, only: MPI_THREAD_MULTIPLE, MPI_COMM_WORLD
 
   implicit none
 
   type(pf_encap_t),   target :: encaps
 
   ! initialize mpi
-  call mpi_init_thread(mpi_thread_funneled, iprovided, ierror)
+  call mpi_init_thread(mpi_thread_multiple, iprovided, ierror)
   if (ierror .ne. 0) &
        stop "ERROR: Can't initialize MPI."
 
   call mpi_comm_size(mpi_comm_world, nprocs, ierror)
 
   nthreads = -1
-  nsteps   = 32
+  nsteps   = 16
   if (nprocs == 1) then
      nlevs = 1
   else
-     nlevs = 3
+     nlevs = 2
   end if
 
   if (nthreads < 0) then
   end if
 
   ! initialize pfasst
-  nx     = [ 32, 64, 128 ]
+!  nx     = [ 32, 64, 128 ]
+  nx     = [ 16, 32, 64 ]
   nvars  = 2 * 3 * nx**3
   nnodes = [ 2, 3, 5 ]
-  dt     = 0.001d0
+  dt     = 0.0001d0
 
   call carray4_encap_create(encaps)
   call pf_mpi_create(tcomm, MPI_COMM_WORLD)
   call pf_pfasst_create(pf, tcomm, nlevs)
 
-  nlevs = pf%nlevels
+  ! nlevs = pf%nlevels
   first = size(nx) - nlevs + 1
 
   if (nprocs == 1) then
   call pf_pfasst_setup(pf)
 
   ! initialize advection/diffusion
+  if (len_trim(pf%outdir) == 0) pf%outdir = "."
+
   call carray4_create(q0, pf%levels(nlevs)%shape)
-  call vortex_sheets(q0)
+  ! call vortex_sheets(q0)
+  ! call random_full(q0)
+  call load(q0, 'full064_s990.h5')
+  if (pf%rank == 0) then
+     call dump(pf%outdir, 'initial.npy', q0)
+  end if
 
   do l = 1, nlevs
      call pf_add_hook(pf, l, PF_POST_SWEEP, project_hook)
      print *, 'NTHREADS: ', nthreads
      print *, 'NSTEPS:   ', nsteps
      print *, 'NPROCS:   ', pf%comm%nproc
+     print *, 'OUTPUT:   ', len_trim(pf%outdir)
+  end if
+
+  if (len_trim(pf%outdir) > 0) then
+     call ndarray_mkdir(pf%outdir, len_trim(pf%outdir))
+     call pf_add_hook(pf, -1, PF_POST_SWEEP, dump_hook)
   end if
 
   ! call pf_logger_attach(pf)
   call mpi_barrier(pf%comm%comm, ierror)
   call pf_pfasst_run(pf, c_loc(q0), dt, 0.0d0, nsteps=nsteps)
 
-
   ! done
   call mpi_barrier(pf%comm%comm, ierror)
 

File src/pf_dtype.f90

   type :: pf_sweeper_t
      type(c_ptr) :: sweeperctx
      integer     :: npieces
-     procedure(pf_sweep_p),      pointer, nopass :: sweep
-     procedure(pf_initialize_p), pointer, nopass :: initialize
-     procedure(pf_evaluate_p),   pointer, nopass :: evaluate
-     procedure(pf_integrate_p),  pointer, nopass :: integrate
-     procedure(pf_sweepdestroy_p),  pointer, nopass :: destroy
+     procedure(pf_sweep_p),        pointer, nopass :: sweep
+     procedure(pf_initialize_p),   pointer, nopass :: initialize
+     procedure(pf_evaluate_p),     pointer, nopass :: evaluate
+     procedure(pf_evaluate_all_p), pointer, nopass :: evaluate_all
+     procedure(pf_integrate_p),    pointer, nopass :: integrate
+     procedure(pf_residual_p),     pointer, nopass :: residual
+     procedure(pf_sweepdestroy_p), pointer, nopass :: destroy
   end type pf_sweeper_t
 
   type :: pf_encap_t
      integer     :: level = -1          ! level number (1 is the coarsest)
      logical     :: Finterp = .false.   ! interpolate functions instead of solutions
 
+
      real(pfdp)  :: residual
 
      type(pf_encap_t),         pointer         :: encap
      logical :: Pipeline_G =  .false.
      logical :: PFASST_pred = .false.
 
+     integer     :: taui0 = -999999     ! Cutoff for tau inclusion
+
      ! pf objects
      type(pf_state_t), pointer :: state
      type(pf_level_t), pointer :: levels(:)
      end subroutine pf_hook_p
 
      ! sweeper interfaces
-     subroutine pf_sweep_p(pf, F, t0, dt)
+     subroutine pf_sweep_p(pf, Lev, t0, dt)
        import pf_pfasst_t, pf_level_t, pfdp, c_ptr
        type(pf_pfasst_t), intent(inout) :: pf
        real(pfdp),        intent(in)    :: dt, t0
-       type(pf_level_t),  intent(inout) :: F
+       type(pf_level_t),  intent(inout) :: Lev
      end subroutine pf_sweep_p
 
-     subroutine pf_evaluate_p(F, t, m)
+     subroutine pf_evaluate_p(Lev, t, m)
        import pf_level_t, pfdp, c_ptr
-       type(pf_level_t), intent(inout) :: F
+       type(pf_level_t), intent(inout) :: Lev
        real(pfdp),       intent(in)    :: t
        integer,          intent(in)    :: m
      end subroutine pf_evaluate_p
 
-     subroutine pf_initialize_p(F)
+     subroutine pf_evaluate_all_p(Lev, t)
+       import pf_level_t, pfdp, c_ptr
+       type(pf_level_t), intent(inout) :: Lev
+       real(pfdp),       intent(in)    :: t(:)
+     end subroutine pf_evaluate_all_p
+
+     subroutine pf_initialize_p(Lev)
        import pf_level_t
-       type(pf_level_t), intent(inout) :: F
+       type(pf_level_t), intent(inout) :: Lev
      end subroutine pf_initialize_p
 
      subroutine pf_sweepdestroy_p(sweeper)
        type(pf_sweeper_t), intent(inout)  :: sweeper
      end subroutine pf_sweepdestroy_p
 
-     subroutine pf_integrate_p(F, qSDC, fSDC, dt, fintSDC)
+     subroutine pf_integrate_p(Lev, qSDC, fSDC, dt, fintSDC)
        import pf_level_t, c_ptr, pfdp
-       type(pf_level_t),  intent(in)    :: F
+       type(pf_level_t),  intent(in)    :: Lev
        type(c_ptr),       intent(in)    :: qSDC(:), fSDC(:, :)
        real(pfdp),        intent(in)    :: dt
        type(c_ptr),       intent(inout) :: fintSDC(:)
      end subroutine pf_integrate_p
 
+     subroutine pf_residual_p(Lev, dt)
+       import pf_level_t, c_ptr, pfdp
+       type(pf_level_t),  intent(inout) :: Lev
+       real(pfdp),        intent(in)    :: dt
+     end subroutine pf_residual_p
+
      ! transfer interfaces
      subroutine pf_transfer_p(qF, qG, levelF, levelctxF, levelG, levelctxG, t)
        import c_ptr, pfdp

File src/pf_explicit.f90

 
 module pf_mod_explicit
   use pf_mod_dtype
+  use pf_mod_utils
   implicit none
   integer, parameter, private :: npieces = 1
 
     sweeper%initialize => explicit_initialize
     sweeper%integrate  => explicit_integrate
     sweeper%destroy    => pf_explicit_destroy
+    sweeper%evaluate_all => pf_generic_evaluate_all
+    sweeper%residual     => pf_generic_residual
 
     sweeper%sweeperctx = c_loc(exp)
   end subroutine pf_explicit_create

File src/pf_imex.f90

 
 module pf_mod_imex
   use pf_mod_dtype
+  use pf_mod_utils
   use pf_mod_explicit, only: pf_f1eval_p
   use pf_mod_implicit, only: pf_f2eval_p, pf_f2comp_p
   implicit none
      procedure(pf_f2eval_p), pointer, nopass :: f2eval
      procedure(pf_f2comp_p), pointer, nopass :: f2comp
 
-     !  Matrices
-     real(pfdp), ALLOCATABLE :: SdiffE(:,:)
-     real(pfdp), ALLOCATABLE :: SdiffI(:,:)
-
+     real(pfdp), allocatable :: SdiffE(:,:)
+     real(pfdp), allocatable :: SdiffI(:,:)
   end type pf_imex_t
 
 contains
     imex%f2comp => f2comp
 
     sweeper%npieces = npieces
-    sweeper%sweep      => imex_sweep
-    sweeper%evaluate   => imex_evaluate
-    sweeper%initialize => imex_initialize
-    sweeper%integrate  => imex_integrate
-    sweeper%destroy    => pf_imex_destroy
+    sweeper%sweep        => imex_sweep
+    sweeper%evaluate     => imex_evaluate
+    sweeper%initialize   => imex_initialize
+    sweeper%integrate    => imex_integrate
+    sweeper%destroy      => pf_imex_destroy
+    sweeper%evaluate_all => pf_generic_evaluate_all
+    sweeper%residual     => pf_generic_residual
 
     sweeper%sweeperctx = c_loc(imex)
   end subroutine pf_imex_create

File src/pf_implicit.f90

 
 module pf_mod_implicit
   use pf_mod_dtype
+  use pf_mod_utils
   implicit none
   integer, parameter, private :: npieces = 1
 
     sweeper%evaluate   => implicit_evaluate
     sweeper%initialize => implicit_initialize
     sweeper%integrate  => implicit_integrate
+    sweeper%evaluate_all => pf_generic_evaluate_all
+    sweeper%residual     => pf_generic_residual
 
     sweeper%sweeperctx = c_loc(imp)
   end subroutine pf_implicit_create

File src/pf_interpolate.f90

        !  end do
     else
        ! recompute fs
-       do m = 1, LevF%nnodes
-          call LevF%sweeper%evaluate(LevF, tF(m), m)
-       end do
+!       do m = 1, LevF%nnodes
+!          call LevF%sweeper%evaluate(LevF, tF(m), m)
+!       end do
+       call LevF%sweeper%evaluate_all(LevF, tF)
     end if  !  Feval
 
     !  Reset qend so that it is up to date

File src/pf_ndarray.f90

 
      subroutine ndarray_dump_numpy(dname, fname, endian, dim, shape, nvars, array) bind(c)
        use iso_c_binding
-       character(c_char), intent(in   )        :: dname, fname, endian(4)
+       character(c_char), intent(in   )        :: dname, fname, endian(5)
        integer(c_int),    intent(in   ), value :: dim, nvars
        integer(c_int),    intent(in   )        :: shape(dim)
        real(c_double),    intent(in   )        :: array(nvars)

File src/pf_numpy.c

   mkdir(dname, 0755);
 }
 
-void ndarray_dump_numpy(char *dname, char *fname, char endian[4], int dim, int *shape, int nvars, double *array)
+void ndarray_dump_numpy(char *dname, char *fname, char endian[5], int dim, int *shape, int nvars, double *array)
 {
   unsigned short  i, len, pad;
   FILE*           fp;

File src/pf_options.f90

     character(len=*),  intent(in   ), optional :: fname
 
     ! local versions of pfasst parameters
-    integer          :: niters, nlevels, qtype, window
+    integer          :: niters, nlevels, qtype, window, taui0
     double precision :: abs_res_tol, rel_res_tol
     logical          :: pipeline_g , pfasst_pred, echo_timings
 
     character(len=32)  :: arg
     character(len=255) :: istring  ! stores command line argument
     character(len=255) :: message  ! use for i/o error messages
+    character(len=512) :: outdir
 
     ! define the namelist for reading
     namelist /pf_params/ niters, nlevels, qtype, abs_res_tol, rel_res_tol, window
-    namelist /pf_params/ pipeline_g, pfasst_pred, echo_timings
+    namelist /pf_params/ pipeline_g, pfasst_pred, echo_timings, taui0, outdir
 
     ! set local variables to pf_pfasst defaults
     nlevels      = pf%nlevels
     pipeline_g   = pf%pipeline_g
     pfasst_pred  = pf%pfasst_pred
     echo_timings = pf%echo_timings
+    taui0        = pf%taui0
+    outdir       = pf%outdir
 
     ! open the file fname and read the pfasst namelist
     if (present(fname))  then
     pf%pipeline_g   = pipeline_g
     pf%pfasst_pred  = pfasst_pred
     pf%echo_timings = echo_timings
+    pf%taui0        = taui0
+    pf%outdir       = outdir
 
     if (pf%nlevels < 1) then
        write(*,*) 'Bad specification for nlevels=', pf%nlevels
     write(un,*) 'nnodes:      ', pf%levels(1:pf%nlevels)%nnodes, '! number of sdc nodes per level'
     write(un,*) 'nvars:       ', pf%levels(1:pf%nlevels)%nvars, '! number of degrees of freedom per level'
     write(un,*) 'nsweeps:     ', pf%levels(1:pf%nlevels)%nsweeps, '! number of sdc sweeps performed per visit to each level'
+    write(un,*) 'taui0:     ',   pf%taui0, '! cutoff for tau correction'
 
     if (pf%comm%nproc > 1) then
        if (pf%window == PF_WINDOW_RING) then

File src/pf_parallel.f90

 
 
     if (pf%comm%nproc > 1) then
+       G => pf%levels(1)
        if (pf%Pipeline_G .and. (G%nsweeps > 1)) then
           !  This is the weird choice.  We burn in without communication, then do extra sweeps
           G => pf%levels(1)
        ! Return to fine level...
        call pf_v_cycle_post_predictor(pf, t0, dt)
 
+    else
+
+       ! Single processor... sweep on coarse and return to fine level.
+
+       G => pf%levels(1)
+       do k = 1, pf%rank + 1
+          pf%state%iter = -k
+          t0k = t0-(pf%rank)*dt + (k-1)*dt
+
+          call call_hooks(pf, G%level, PF_PRE_SWEEP)
+          do j = 1, G%nsweeps
+             call G%sweeper%sweep(pf, G, t0k, dt)
+          end do
+          call pf_residual(pf, G, dt)
+          call call_hooks(pf, G%level, PF_POST_SWEEP)
+       end do
+
+       ! Return to fine level...
+       call pf_v_cycle_post_predictor(pf, t0, dt)
+
     end if
 
     call end_timer(pf, TPREDICTOR)
-    call call_hooks(pf, 1, PF_POST_PREDICTOR)
+    call call_hooks(pf, -1, PF_POST_PREDICTOR)
 
     pf%state%iter   = 0
     pf%state%status = PF_STATUS_ITERATING
     type(pf_pfasst_t), intent(inout) :: pf
     real(pfdp),        intent(inout) :: residual, energy
 
-    real(pfdp) :: residual1, energy1
+    real(pfdp) :: residual1
 
     residual1 = pf%levels(pf%nlevels)%residual
     if (pf%state%status == PF_STATUS_ITERATING .and. residual > 0.0d0) then
           call call_hooks(pf, F%level, PF_POST_SWEEP)
        end if
 
-          
+
     end do
 
   end subroutine pf_v_cycle

File src/pf_pfasst.f90

 
     if (present(nlevels)) pf%nlevels = nlevels
 
+    pf%outdir = ""
+
     ! gather some input from a file and command line
     read_cmd = .true.
     if (present(nocmd)) then

File src/pf_restrict.f90

   !
   ! Restrict (in time and space) qF to qG.
   !
-  subroutine restrict_sdc(F, G, qF, qG, integral,tF)
+  subroutine restrict_sdc(LevF, LevG, qF, qG, integral,tF)
     use pf_mod_utils, only: pf_apply_mat
 
-    type(pf_level_t), intent(inout) :: F, G
+    type(pf_level_t), intent(inout) :: LevF, LevG
     type(c_ptr),      intent(inout) :: qF(:), qG(:)
     logical,          intent(in)    :: integral
     real(pfdp),       intent(in) :: tF(:)
 
     if (integral) then
 
-       allocate(qFr(F%nnodes-1))
+       allocate(qFr(LevF%nnodes-1))
 
-       do m = 1, F%nnodes-1
-          call G%encap%create(qFr(m), G%level, SDC_KIND_INTEGRAL, &
-               G%nvars, G%shape, G%levelctx, G%encap%encapctx)
-          call F%restrict(qF(m), qFr(m), F%level, F%levelctx, G%level, G%levelctx,tF(m))
+       do m = 1, LevF%nnodes-1
+          call LevG%encap%create(qFr(m), LevG%level, SDC_KIND_INTEGRAL, &
+               LevG%nvars, LevG%shape, LevG%levelctx, LevG%encap%encapctx)
+          call LevF%restrict(qF(m), qFr(m), LevF%level, LevF%levelctx, LevG%level, LevG%levelctx,tF(m))
        end do
 
        ! when restricting '0 to node' integral terms, skip the first
        ! entry since it is zero
-       call pf_apply_mat(qG, 1.d0, F%rmat(2:,2:), qFr, G%encap)
+       call pf_apply_mat(qG, 1.d0, LevF%rmat(2:,2:), qFr, LevG%encap)
 
     else
 
-       allocate(qFr(F%nnodes))
+       allocate(qFr(LevF%nnodes))
 
-       do m = 1, F%nnodes
-          call G%encap%create(qFr(m), G%level, SDC_KIND_SOL_NO_FEVAL, &
-               G%nvars, G%shape, G%levelctx, G%encap%encapctx)
-          call F%restrict(qF(m), qFr(m), F%level, F%levelctx, G%level, G%levelctx,tF(m))
+       do m = 1, LevF%nnodes
+          call LevG%encap%create(qFr(m), LevG%level, SDC_KIND_SOL_NO_FEVAL, &
+               LevG%nvars, LevG%shape, LevG%levelctx, LevG%encap%encapctx)
+          call LevF%restrict(qF(m), qFr(m), LevF%level, LevF%levelctx, LevG%level, LevG%levelctx,tF(m))
        end do
 
-       call pf_apply_mat(qG, 1.d0, F%rmat, qFr, G%encap)
+       call pf_apply_mat(qG, 1.d0, LevF%rmat, qFr, LevG%encap)
 
     end if
 
     do m = 1, size(qFr)
-       call G%encap%destroy(qFr(m))
+       call LevG%encap%destroy(qFr(m))
     end do
     deallocate(qFr)
 
   ! we should still compute the FAS correction since the function
   ! evaluations may be different.
   !
-  subroutine restrict_time_space_fas(pf, t0, dt, F, G)
+  subroutine restrict_time_space_fas(pf, t0, dt, LevF, LevG)
     type(pf_pfasst_t), intent(inout) :: pf
     real(pfdp),        intent(in)    :: t0, dt
-    type(pf_level_t),  intent(inout) :: F, G
+    type(pf_level_t),  intent(inout) :: LevF, LevG
 
     integer    :: m
-    real(pfdp) :: tG(G%nnodes)
-    real(pfdp) :: tF(F%nnodes)
+    real(pfdp) :: tG(LevG%nnodes)
+    real(pfdp) :: tF(LevF%nnodes)
     type(c_ptr) :: &
-         tmpG(G%nnodes), &    ! coarse integral of coarse function values
-         tmpF(F%nnodes), &    ! fine integral of fine function values
-         tmpFr(G%nnodes)      ! coarse integral of restricted fine function values
+         tmpG(LevG%nnodes), &    ! coarse integral of coarse function values
+         tmpF(LevF%nnodes), &    ! fine integral of fine function values
+         tmpFr(LevG%nnodes)      ! coarse integral of restricted fine function values
 
-    call call_hooks(pf, F%level, PF_PRE_RESTRICT_ALL)
-    call start_timer(pf, TRESTRICT + F%level - 1)
+    call call_hooks(pf, LevF%level, PF_PRE_RESTRICT_ALL)
+    call start_timer(pf, TRESTRICT + LevF%level - 1)
 
     !
     ! create workspaces
     !
-    do m = 1, G%nnodes
-       call G%encap%create(tmpG(m), G%level, SDC_KIND_INTEGRAL, &
-            G%nvars, G%shape, G%levelctx, G%encap%encapctx)
-       call G%encap%create(tmpFr(m), G%level, SDC_KIND_INTEGRAL, &
-            G%nvars, G%shape, G%levelctx, G%encap%encapctx)
+    do m = 1, LevG%nnodes
+       call LevG%encap%create(tmpG(m), LevG%level, SDC_KIND_INTEGRAL, &
+            LevG%nvars, LevG%shape, LevG%levelctx, LevG%encap%encapctx)
+       call LevG%encap%create(tmpFr(m), LevG%level, SDC_KIND_INTEGRAL, &
+            LevG%nvars, LevG%shape, LevG%levelctx, LevG%encap%encapctx)
     end do
 
-    do m = 1, F%nnodes
-       call F%encap%create(tmpF(m), F%level, SDC_KIND_INTEGRAL, &
-            F%nvars, F%shape, F%levelctx, F%encap%encapctx)
+    do m = 1, LevF%nnodes
+       call LevF%encap%create(tmpF(m), LevF%level, SDC_KIND_INTEGRAL, &
+            LevF%nvars, LevF%shape, LevF%levelctx, LevF%encap%encapctx)
     end do
 
 
     !
     ! restrict q's and recompute f's
     !
-    tG = t0 + dt*G%nodes
-    tF = t0 + dt*F%nodes
+    tG = t0 + dt*LevG%nodes
+    tF = t0 + dt*LevF%nodes
 
-    call restrict_sdc(F, G, F%Q, G%Q, .false.,tF)
+    call restrict_sdc(LevF, LevG, LevF%Q, LevG%Q, .false.,tF)
 
-    do m = 1, G%nnodes
-       call G%sweeper%evaluate(G, tG(m), m)
-    end do
+!    do m = 1, LevG%nnodes
+!       call LevG%sweeper%evaluate(LevG, tG(m), m)
+!    end do
+     call LevG%sweeper%evaluate_all(LevG, tG)
 
 
     !
     ! fas correction
     !
-
-    do m = 1, G%nnodes-1
-       call G%encap%setval(G%tau(m), 0.0_pfdp)
-    end do
-
-    ! compute '0 to node' integral on the coarse level
-
-    call G%sweeper%integrate(G, G%Q, G%F, dt, tmpG)
-    do m = 2, G%nnodes-1
-       call G%encap%axpy(tmpG(m), 1.0_pfdp, tmpG(m-1))
+    do m = 1, LevG%nnodes-1
+       call LevG%encap%setval(LevG%tau(m), 0.0_pfdp)
     end do
-
-    ! restrict '0 to node' integral on the fine level (which was
-    ! computed during the last call to pf_residual)
-
-    call restrict_sdc(F, G, F%I, tmpFr, .true.,tF)
-
-    ! compute 'node to node' tau correction
-
-    call G%encap%axpy(G%tau(1),  1.0_pfdp, tmpFr(1))
-    call G%encap%axpy(G%tau(1), -1.0_pfdp, tmpG(1))
-
-    do m = 2, G%nnodes-1
-       call G%encap%axpy(G%tau(m),  1.0_pfdp, tmpFr(m))
-       call G%encap%axpy(G%tau(m), -1.0_pfdp, tmpFr(m-1))
-
-       call G%encap%axpy(G%tau(m), -1.0_pfdp, tmpG(m))
-       call G%encap%axpy(G%tau(m),  1.0_pfdp, tmpG(m-1))
-    end do
-    if (pf%state%iter < 1)  then
-       do m = 1, G%nnodes-1
-          call G%encap%setval(G%tau(m), 0.0_pfdp)
+    if (pf%state%iter >= pf%taui0)  then
+       ! compute '0 to node' integral on the coarse level
+       call LevG%sweeper%integrate(LevG, LevG%Q, LevG%F, dt, tmpG)
+       do m = 2, LevG%nnodes-1
+          call LevG%encap%axpy(tmpG(m), 1.0_pfdp, tmpG(m-1))
        end do
-    end if
+       
+       ! compute '0 to node' integral on the fine level
+       call LevF%sweeper%integrate(LevF, LevF%Q, LevF%F, dt, LevF%I)
+       do m = 2, LevF%nnodes-1
+          call LevF%encap%axpy(LevF%I(m), 1.0_pfdp, LevF%I(m-1))
+       end do
+       
+       ! restrict '0 to node' integral on the fine level (which was
+       ! computed during the last call to pf_residual)
+       call restrict_sdc(LevF, LevG, LevF%I, tmpFr, .true.,tF)
+       
+       ! compute 'node to node' tau correction
+       
+       call LevG%encap%axpy(LevG%tau(1),  1.0_pfdp, tmpFr(1))
+       call LevG%encap%axpy(LevG%tau(1), -1.0_pfdp, tmpG(1))
+       
+       do m = 2, LevG%nnodes-1
+          call LevG%encap%axpy(LevG%tau(m),  1.0_pfdp, tmpFr(m))
+          call LevG%encap%axpy(LevG%tau(m), -1.0_pfdp, tmpFr(m-1))
+          
+          call LevG%encap%axpy(LevG%tau(m), -1.0_pfdp, tmpG(m))
+          call LevG%encap%axpy(LevG%tau(m),  1.0_pfdp, tmpG(m-1))
+       end do
+      end if
     !
     ! tidy
     !
 
-    do m = 1, G%nnodes
-       call G%encap%destroy(tmpG(m))
-       call G%encap%destroy(tmpFr(m))
+    do m = 1, LevG%nnodes
+       call LevG%encap%destroy(tmpG(m))
+       call LevG%encap%destroy(tmpFr(m))
     end do
 
-    do m = 1, F%nnodes
-       call F%encap%destroy(tmpF(m))
+    do m = 1, LevF%nnodes
+       call LevF%encap%destroy(tmpF(m))
     end do
 
-    call end_timer(pf, TRESTRICT + F%level - 1)
-    call call_hooks(pf, F%level, PF_POST_RESTRICT_ALL)
+    call end_timer(pf, TRESTRICT + LevF%level - 1)
+    call call_hooks(pf, LevF%level, PF_POST_RESTRICT_ALL)
 
   end subroutine restrict_time_space_fas
 

File src/pf_utils.f90

   !
   ! Spread initial condition.
   !
-  subroutine spreadq0(F, t0)
-    type(pf_level_t), intent(inout) :: F
+  subroutine spreadq0(Lev, t0)
+    type(pf_level_t), intent(inout) :: Lev
     real(pfdp),       intent(in)    :: t0
 
     integer :: m, p
 
-    call F%encap%unpack(F%Q(1), F%q0)
-    call F%sweeper%evaluate(F, t0, 1)
+    call Lev%encap%unpack(Lev%Q(1), Lev%q0)
+    call Lev%sweeper%evaluate(Lev, t0, 1)
 
-    do m = 2, F%nnodes
-       call F%encap%copy(F%Q(m), F%Q(1))
-       do p = 1, F%sweeper%npieces
-          call F%encap%copy(F%F(m,p), F%F(1,p))
+    do m = 2, Lev%nnodes
+       call Lev%encap%copy(Lev%Q(m), Lev%Q(1))
+       do p = 1, Lev%sweeper%npieces
+          call Lev%encap%copy(Lev%F(m,p), Lev%F(1,p))
        end do
     end do
   end subroutine spreadq0
   !
   ! Save current Q and F.
   !
-  subroutine save(F)
-    type(pf_level_t), intent(inout) :: F
+  subroutine save(Lev)
+    type(pf_level_t), intent(inout) :: Lev
 
     integer :: m, p
 
-    if (F%Finterp) then
+    if (Lev%Finterp) then
        !  Changed by MM Dec. 21, 2013 to save solutions too
-       if (associated(F%pF)) then
-          do m = 1, F%nnodes
-             do p = 1,size(F%F(1,:))
-                call F%encap%copy(F%pF(m,p), F%F(m,p))
+       if (associated(Lev%pF)) then
+          do m = 1, Lev%nnodes
+             do p = 1,size(Lev%F(1,:))
+                call Lev%encap%copy(Lev%pF(m,p), Lev%F(m,p))
              end do
-             call F%encap%copy(F%pQ(m), F%Q(m))
+             call Lev%encap%copy(Lev%pQ(m), Lev%Q(m))
           end do
-!          call F%encap%copy(F%pQ(1), F%Q(1))
+!          call Lev%encap%copy(Lev%pQ(1), Lev%Q(1))
        end if
     else
-       if (associated(F%pQ)) then
-          do m = 1, F%nnodes
-             call F%encap%copy(F%pQ(m), F%Q(m))
+       if (associated(Lev%pQ)) then
+          do m = 1, Lev%nnodes
+             call Lev%encap%copy(Lev%pQ(m), Lev%Q(m))
           end do
        end if
     end if
   ! node' integral and store it in I.  This is used later when doing
   ! restriction (see restrict_time_space_fas).
   !
-  subroutine pf_residual(pf, F, dt)
+  subroutine pf_residual(pf, Lev, dt)
     type(pf_pfasst_t), intent(inout) :: pf
-    type(pf_level_t),  intent(inout) :: F
+    type(pf_level_t),  intent(inout) :: Lev
     real(pfdp),        intent(in)    :: dt
 
-    real(pfdp) :: norms(F%nnodes-1)
+    real(pfdp) :: norms(Lev%nnodes-1)
     integer :: m, n
 
-    if (pf%nlevels == 1 .and. pf%abs_res_tol == 0 .and. pf%rel_res_tol == 0) return
+!    if (pf%nlevels == 1 .and. pf%abs_res_tol == 0 .and. pf%rel_res_tol == 0) return
+!   I think we often want the residual for diagnostics.  Maybe need flag to turn this off
+!   for efficiency?
 
     call start_timer(pf, TRESIDUAL)
 
-    call F%sweeper%integrate(F, F%Q, F%F, dt, F%I)
-    do m = 2, F%nnodes-1
-       call F%encap%axpy(F%I(m), 1.0_pfdp, F%I(m-1))
-    end do
-
-    ! add tau (which is 'node to node')
-    if (associated(F%tau)) then
-       do m = 1, F%nnodes-1
-          do n = 1, m
-             call F%encap%axpy(F%I(m), 1.0_pfdp, F%tau(n))
-          end do
-       end do
-    end if
-
-    ! subtract out Q
-    do m = 1, F%nnodes-1
-       call F%encap%copy(F%R(m), F%Q(1))
-       call F%encap%axpy(F%R(m),  1.0_pfdp, F%I(m))
-       call F%encap%axpy(F%R(m), -1.0_pfdp, F%Q(m+1))
-    end do
+    call Lev%sweeper%residual(Lev, dt)
 
     ! compute max residual norm
-    do m = 1, F%nnodes-1
-       norms(m) = F%encap%norm(F%R(m))
+    do m = 1, Lev%nnodes-1
+       norms(m) = Lev%encap%norm(Lev%R(m))
     end do
-    F%residual = maxval(abs(norms))
+    Lev%residual = maxval(abs(norms))
 
     call end_timer(pf, TRESIDUAL)
 
     end do
   end subroutine pf_apply_mat
 
+  !
+  ! Generic residual
+  !
+  subroutine pf_generic_residual(Lev, dt)
+    type(pf_level_t),  intent(inout) :: Lev
+    real(pfdp),        intent(in)    :: dt
+
+    integer :: m, n
+
+    call Lev%sweeper%integrate(Lev, Lev%Q, Lev%F, dt, Lev%I)
+    do m = 2, Lev%nnodes-1
+       call Lev%encap%axpy(Lev%I(m), 1.0_pfdp, Lev%I(m-1))
+    end do
+
+    ! add tau (which is 'node to node')
+    if (associated(Lev%tau)) then
+       do m = 1, Lev%nnodes-1
+          do n = 1, m
+             call Lev%encap%axpy(Lev%I(m), 1.0_pfdp, Lev%tau(n))
+          end do
+       end do
+    end if
+
+    ! subtract out Q
+    do m = 1, Lev%nnodes-1
+       call Lev%encap%copy(Lev%R(m), Lev%Q(1))
+       call Lev%encap%axpy(Lev%R(m),  1.0_pfdp, Lev%I(m))
+       call Lev%encap%axpy(Lev%R(m), -1.0_pfdp, Lev%Q(m+1))
+    end do
+
+  end subroutine pf_generic_residual
+
+  !
+  ! Generic evaluate all
+  !
+  subroutine pf_generic_evaluate_all(Lev, t)
+    type(pf_level_t),  intent(inout) :: Lev
+    real(pfdp),        intent(in)    :: t(:)
+
+    integer :: m
+    do m = 1, Lev%nnodes
+       call Lev%sweeper%evaluate(Lev, t(m), m)
+    end do
+  end subroutine pf_generic_evaluate_all
+
 end module pf_mod_utils