Commits

Tom Roche  committed 7ab17a6

first real commit to proto-package for "regrid helpers"

Got tired of keeping track of latest helper code shared by various other projects

https://bitbucket.org/tlroche/gfed-3.1_global_to_aqmeii-na
https://bitbucket.org/tlroche/edgar-4.2_minus_soil_and_biomass_to_aqmeii-na
https://bitbucket.org/tlroche/clm_cn_global_to_aqmeii-na
https://bitbucket.org/tlroche/geia_regrid
https://bitbucket.org/tlroche/aqmeii_ag_soil

(et al) esp after one more change to R on terrae, so am (finally!) making a separate repo. Code committed is judged to be latest (or rather greatest) from

$ ls -alt /home/rtd/code/regridding/CLM_CN_global_to_AQMEII-NA/bash_utilities.sh
/home/rtd/code/regridding/AQMEII_ag_soil/bash_utilities.sh
/home/rtd/code/regridding/regrid_utils/bash_utilities.sh
/home/rtd/code/regridding/GFED-3.1_global_to_AQMEII-NA/bash_utilities.sh
/home/rtd/code/regridding/GEIA_regrid/bash_utilities.sh
* 8970 Apr 26 09:30 ./CLM_CN_global_to_AQMEII-NA/bash_utilities.sh
> 8890 Apr 23 14:09 ./GEIA_regrid/bash_utilities.sh
> 8165 Apr 20 17:01 ./AQMEII_ag_soil/bash_utilities.sh
> 8165 Mar 31 20:16 ./GFED-3.1_global_to_AQMEII-NA/bash_utilities.sh

$ ls -alt /home/rtd/code/regridding/regrid_utils/get_daily_emis_fp.ncl
/home/rtd/code/regridding/GFED-3.1_global_to_AQMEII-NA/get_daily_emis_fp.ncl
* 1337 Apr 14 19:24 ./GFED-3.1_global_to_AQMEII-NA/get_daily_emis_fp.ncl

$ ls -alt /home/rtd/code/regridding/regrid_utils/get_input_areas.ncl
/home/rtd/code/regridding/GFED-3.1_global_to_AQMEII-NA/get_input_areas.ncl
* 8729 Apr 16 13:05 ./GFED-3.1_global_to_AQMEII-NA/get_input_areas.ncl

$ ls -alt /home/rtd/code/regridding/regrid_utils/get_output_areas.ncl
/home/rtd/code/regridding/GFED-3.1_global_to_AQMEII-NA/get_output_areas.ncl
* 8336 Apr 16 16:00 ./GFED-3.1_global_to_AQMEII-NA/get_output_areas.ncl

$ ls -alt /home/rtd/code/regridding/CLM_CN_global_to_AQMEII-NA/lats_lons_sphere_area.ncl
/home/rtd/code/regridding/regrid_utils/lats_lons_sphere_area.ncl
* 3011 Apr 25 11:59 ./CLM_CN_global_to_AQMEII-NA/lats_lons_sphere_area.ncl

$ ls -alt /home/rtd/code/regridding/MOZART_global_to_AQMEII-NA/plotLayersForTimestep.r
/home/rtd/code/regridding/GEIA_regrid.bak/plotLayersForTimestep.r
/home/rtd/code/regridding/regrid_utils/plotLayersForTimestep.r
/home/rtd/code/regridding/GEIA_regrid/plotLayersForTimestep.r
* 9708 Apr 22 17:00 ./GEIA_regrid/plotLayersForTimestep.r
> 9708 Mar 6 20:32 ./GEIA_regrid.bak/plotLayersForTimestep.r
> 9708 Dec 22 00:17 ./MOZART_global_to_AQMEII-NA/plotLayersForTimestep.r

$ ls -alt /home/rtd/code/regridding/CLM_CN_global_to_AQMEII-NA/string.ncl
/home/rtd/code/regridding/regrid_utils/string.ncl
* 1854 Apr 24 15:13 ./CLM_CN_global_to_AQMEII-NA/string.ncl

$ ls -alt /home/rtd/code/regridding/CLM_CN_global_to_AQMEII-NA/summarize.ncl
/home/rtd/code/regridding/EDGAR-4.2_minus_soil_and_biomass/summarize.ncl
/home/rtd/code/regridding/GEIA_regrid.bak/summarize.ncl
/home/rtd/code/regridding/regrid_utils/summarize.ncl
* 4429 Mar 17 17:37 ./GEIA_regrid.bak/summarize.ncl
> 4429 Mar 15 19:10 ./EDGAR-4.2_minus_soil_and_biomass/summarize.ncl
> 1913 Mar 10 18:41 ./CLM_CN_global_to_AQMEII-NA/summarize.ncl

$ ls -alt /home/rtd/code/regridding/CLM_CN_global_to_AQMEII-NA/time.ncl
/home/rtd/code/regridding/EDGAR-4.2_minus_soil_and_biomass/time.ncl
/home/rtd/code/regridding/GEIA_regrid.bak/time.ncl
/home/rtd/code/regridding/regrid_utils/time.ncl
/home/rtd/code/regridding/GFED-3.1_global_to_AQMEII-NA/time.ncl
* 2178 Apr 10 13:29 ./GFED-3.1_global_to_AQMEII-NA/time.ncl
> 1134 Mar 8 15:43 ./GEIA_regrid.bak/time.ncl
> 1134 Mar 8 14:02 ./CLM_CN_global_to_AQMEII-NA/time.ncl
> 1134 Mar 8 14:02 ./EDGAR-4.2_minus_soil_and_biomass/time.ncl

$ ls -alt /home/rtd/code/regridding/CLM_CN_global_to_AQMEII-NA/visualization.r
/home/rtd/code/regridding/EDGAR-4.2_minus_soil_and_biomass/visualization.r
/home/rtd/code/regridding/AQMEII_ag_soil/visualization.r
/home/rtd/code/regridding/regrid_utils/visualization.r
/home/rtd/code/regridding/GFED-3.1_global_to_AQMEII-NA/visualization.r
* 11959 Apr 20 17:26 ./AQMEII_ag_soil/visualization.r
> 11959 Apr 4 21:23 ./GFED-3.1_global_to_AQMEII-NA/visualization.r
> 10377 Mar 18 19:18 ./CLM_CN_global_to_AQMEII-NA/visualization.r
> 10377 Mar 18 17:15 ./EDGAR-4.2_minus_soil_and_biomass/visualization.r

$ ls -alt /home/rtd/code/regridding/CLM_CN_global_to_AQMEII-NA/netCDF.stats.to.stdout.r
/home/rtd/code/regridding/MOZART_global_to_AQMEII-NA/netCDF.stats.to.stdout.r
/home/rtd/code/regridding/EDGAR-4.2_minus_soil_and_biomass/netCDF.stats.to.stdout.r
/home/rtd/code/regridding/AQMEII_ag_soil/netCDF.stats.to.stdout.r
/home/rtd/code/regridding/regrid_utils/netCDF.stats.to.stdout.r
/home/rtd/code/regridding/GFED-3.1_global_to_AQMEII-NA/netCDF.stats.to.stdout.r
* 12842 Apr 24 15:11 ./CLM_CN_global_to_AQMEII-NA/netCDF.stats.to.stdout.r
> 12842 Apr 20 20:10 ./AQMEII_ag_soil/netCDF.stats.to.stdout.r
> 14621 Apr 3 20:22 ./GFED-3.1_global_to_AQMEII-NA/netCDF.stats.to.stdout.r
> 14298 Mar 11 18:52 ./EDGAR-4.2_minus_soil_and_biomass/netCDF.stats.to.stdout.r
> 6376 Dec 22 00:17 ./MOZART_global_to_AQMEII-NA/netCDF.stats.to.stdout.r

TODO:
* redo projects above to checkout this code
* make "real" NCL and R packages

  • Participants
  • Parent commits eddf5e4

Comments (0)

Files changed (11)

File bash_utilities.sh

+# description---------------------------------------------------------
+
+# Source me for bash convenience functions: notably,
+# ensuring system/environment dependencies are available.
+# Note this assumes linux platform: dependencies include
+# * `hostname`
+
+# code----------------------------------------------------------------
+
+# constants-----------------------------------------------------------
+
+### TODO: take switches for help, debugging, no/eval, target drive
+
+# Following does not work with `source`
+# THIS="$0"
+# THIS_FN="$(basename ${THIS})"
+# THIS_DIR="$(dirname ${THIS})"
+
+### constants by model/run
+# TODO: read IOAPI info from CCTM Makefile
+IOAPI_VERSION="3.1" # desired
+
+### constants by host/cluster
+
+## tlrPanP5
+# HDF5 version per http://www.hdfgroup.org/hdf5-quest.html#vers1 :
+# `find /usr/lib/ | fgrep -ie 'libhdf5.a' | xargs strings | fgrep -ne 'HDF5 library version:'`
+TLRP_HDF5_VERSION='1.8.8'
+TLRP_NCL_VERSION="ncl_ncarg-6.1.1.Linux_Debian6.0_x86_64_nodap_gcc445"
+TLRP_NETCDF_VERSION='4.1.3' # AND I NOW HAVE R PACKAGE=ncdf4 !!!
+TLRP_R_VERSION='2.15.1'
+
+# NCARG_ROOT required by NCL
+TLRP_NCARG_ROOT="${HOME}/bin/${TLRP_NCL_VERSION}"
+TLRP_NCL_PATH="${TLRP_NCARG_ROOT}/bin"
+TLRP_R_PATH='/usr/bin'
+
+## HPCC
+HPCC_NCO_VERSION='4.0.8' # on infinity, anyway
+# a much less regular environment than EMVL :-(
+AMAD1_NCL_VERSION='ncl_ncarg-6.1.2.Linux_RHEL5.6_x86_64_nodap_gcc412'
+
+HPCC_R_PATH='/share/linux86_64/bin'
+# `ncdump` now on hpcc in /usr/bin
+#HPCC_NCDUMP_PATH='/share/linux86_64/grads/supplibs-2.2.0/x86_64-unknown-linux-gnu/bin'
+HPCC_IOAPI_LIB_PATH="/project/air5/roche/CMAQ-5-eval/lib/ioapi_${IOAPI_VERSION}"
+HPCC_IOAPI_BIN_PATH="${HPCC_IOAPI_LIB_PATH}"
+HPCC_NCO_PATH="/share/linux86_64/nco/nco-${NCO_VERSION}/bin"
+
+AMAD1_NCARG_ROOT="${HOME}/bin/${AMAD1_NCL_VERSION}"
+AMAD1_NCL_PATH="${AMAD1_NCARG_ROOT}/bin"
+
+## terrae/EMVL
+TERRAE_MODULE_COMPILER='gcc-4.1.2'
+
+#TERRAE_HDF5_VERSION='1.8.7'
+TERRAE_HDF5_VERSION='1.8.10/intel-13.0'
+TERRAE_INTEL_VERSION='13.1'
+#TERRAE_NCO_VERSION='4.0.5'
+#TERRAE_NCO_VERSION='4.2.3/${TERRAE_MODULE_COMPILER}'
+TERRAE_NCO_VERSION='4.2.3'
+#TERRAE_NETCDF_VERSION='4.1.2'
+TERRAE_NETCDF_VERSION='4.1.2/intel-13.0'
+#TERRAE_R_VERSION='2.15.2'
+TERRAE_R_VERSION='3.0.0'
+
+TERRAE_HDF5_MODULE="hdf5-${TERRAE_HDF5_VERSION}"
+
+TERRAE_INTEL_MODULE="intel-${TERRAE_INTEL_VERSION}"
+
+#TERRAE_IOAPI_MODULE="ioapi-${IOAPI_VERSION}/${TERRAE_MODULE_COMPILER}"
+TERRAE_IOAPI_MODULE="ioapi-${IOAPI_VERSION}"
+
+TERRAE_NCO_MODULE="nco-${TERRAE_NCO_VERSION}"
+
+TERRAE_NETCDF_MODULE="netcdf-${TERRAE_NETCDF_VERSION}"
+
+#TERRAE_NCL_MODULE_VERSION='6.0.0' # outdated
+TERRAE_NCL_VERSION='ncl_ncarg-6.1.2.Linux_RHEL5.6_x86_64_gcc412'
+# NCARG_ROOT required by NCL
+# use Herwehe's while EMVL upgrades RHEL
+TERRAE_NCARG_ROOT="/home/hhg/ncl_ncarg/${TERRAE_NCL_VERSION}"
+
+#TERRAE_R_MODULE='R'
+# note different separator
+TERRAE_R_MODULE="R/${TERRAE_R_VERSION}"
+
+# ----------------------------------------------------------------------
+# functions
+# ----------------------------------------------------------------------
+
+# Ensure various needed artifacts are on various *xy paths,
+# but assumes `hostname` is already available.
+# TODO: ensure your hostname matches here!
+# TODO: setup packages={ncdf4} on infinity, not just amad
+function setup_paths {
+  H="$(hostname)"
+  case "${H}" in
+    amad*)
+      echo -e "${FUNCNAME[0]}: ${H} is on hpcc"
+# as of 22 May 12, on the hpcc R servers NCO is installed normally, in /usr/bin
+#      addPath "${HPCC_NCO_PATH}"
+      addPath "${HPCC_IOAPI_BIN_PATH}"
+      addPath "${AMAD1_NCL_PATH}"
+      addPath "${HPCC_R_PATH}"
+      addLdLibraryPath "${HPCC_IOAPI_LIB_PATH}"
+      ;;
+    global*)
+      echo -e "${FUNCNAME[0]}: ${H} is on hpcc"
+#      addPath "${HPCC_NCO_PATH}"
+      addPath "${HPCC_IOAPI_BIN_PATH}"
+      addPath "${HPCC_R_PATH}"
+      addLdLibraryPath "${HPCC_IOAPI_LIB_PATH}"
+      ;;
+    imaster*) # == infinity
+      echo -e "${FUNCNAME[0]}: ${H} is on hpcc"
+      echo -e "For R packages such as ncdf4, must run on amad"
+      addPath "${HPCC_NCO_PATH}"
+      addPath "${HPCC_IOAPI_BIN_PATH}"
+      addPath "${HPCC_R_PATH}"
+      addLdLibraryPath "${HPCC_IOAPI_LIB_PATH}"
+      ;;
+    inode*) # == node39
+      echo -e "${FUNCNAME[0]}: ${H} is on hpcc"
+      echo -e "For R packages such as ncdf4, must run on amad"
+      addPath "${HPCC_NCO_PATH}"
+      addPath "${HPCC_IOAPI_BIN_PATH}"
+      addPath "${HPCC_R_PATH}"
+      addLdLibraryPath "${HPCC_IOAPI_LIB_PATH}"
+      ;;
+    terra*)
+      echo -e "${FUNCNAME[0]}: ${H} is on terrae"
+      removeModule 'intel' # required on terrae/EMVL as of 23 Apr 2013
+      addModule "${TERRAE_INTEL_MODULE}"
+      addModule "gdal-1.9.2/intel-13.0"
+      addModule "proj-4.8.0/intel-13.0"
+      addModule "${TERRAE_HDF5_MODULE}"
+      addModule "${TERRAE_NETCDF_MODULE}"
+      addModule "${TERRAE_R_MODULE}"
+      echo -e "${FUNCNAME[0]}: 'module list -t'=="
+      modulecmd bash list -t > ${TEMPFILE}
+      source ${TEMPFILE}
+      # until NCL is module'd on terrae
+      export NCARG_ROOT="${TERRAE_NCARG_ROOT}"
+      addPath "${TERRAE_NCARG_ROOT}/bin"
+      ;;
+    tlr*)
+      echo -e "${FUNCNAME[0]}: ${H} is a Tom box"
+      # NCL
+      export NCARG_ROOT="${TLRP_NCARG_ROOT}"
+      addPath "${TLRP_NCL_PATH}"
+      addPath "${TLRP_R_PATH}"
+      ;;
+    *)
+      echo -e "${FUNCNAME[0]}: unknown ${H}"
+#      exit 1
+      ;;
+  esac
+} # end function setup_paths
+
+# Choose which app to use based on host. Assumes
+# * paths (et al) are properly configured by setup_paths (above)
+# * `hostname` is available
+# TODO: ensure your hostname matches here!
+function setup_apps {
+  H="$(hostname)"
+  case "${H}" in
+    amad*)
+      echo -e "${FUNCNAME[0]}: ${H} is on hpcc"
+      export PDF_VIEWER='xpdf'
+      ;;
+    global*)
+      echo -e "${FUNCNAME[0]}: ${H} is on hpcc"
+      export PDF_VIEWER='xpdf'
+      ;;
+    imaster*) # == infinity
+      echo -e "${FUNCNAME[0]}: ${H} is on hpcc"
+      export PDF_VIEWER='xpdf'
+      ;;
+    inode*) # == node39
+      echo -e "${FUNCNAME[0]}: ${H} is on hpcc"
+      export PDF_VIEWER='xpdf'
+      ;;
+    terra*)
+      echo -e "${FUNCNAME[0]}: ${H} is on terrae"
+      export PDF_VIEWER='xpdf'
+      ;;
+    tlr*)
+      echo -e "${FUNCNAME[0]}: ${H} is a Tom box"
+      export PDF_VIEWER='evince'
+      ;;
+    *)
+      echo -e "${FUNCNAME[0]}: unknown ${H}"
+#      exit 1
+      ;;
+  esac
+} # end function setup_apps
+
+# add $1 to PATH if not already there
+function addPath {
+    DIR="$1"
+    if [[ -n "${DIR}" ]] ; then
+      if [ -d "${DIR}" ] ; then
+        if [[ ":${PATH}:" != *":${DIR}:"* ]] ; then
+          PATH="${DIR}:${PATH}"
+        else
+          echo -e "${FUNCNAME[0]}: PATH contains '${DIR}'"
+        fi
+      else
+        echo -e "${THIS_FN}::${FUNCNAME[0]}: ERROR: '${DIR}' is not a directory" 1>&2
+      fi
+    else
+      echo -e "${THIS_FN}::${FUNCNAME[0]}: ERROR: DIR not defined" 1>&2
+    fi
+}
+
+# add $1 to LD_LIBRARY_PATH if not already there
+function addLdLibraryPath {
+    DIR="$1"
+    if [[ -n "${DIR}" ]] ; then
+      if [ -d "${DIR}" ] ; then
+        if [[ ":${LD_LIBRARY_PATH}:" != *":${DIR}:"* ]] ; then
+          LD_LIBRARY_PATH="${DIR}:${LD_LIBRARY_PATH}"
+        else
+          echo -e "LD_LIBRARY_PATH contains '${DIR}'"
+        fi
+      else
+        echo -e "${THIS_FN}::${FUNCNAME[0]}: ERROR: '${DIR}' is not a directory" 1>&2
+      fi
+    else
+      echo -e "${THIS_FN}::${FUNCNAME[0]}: ERROR: DIR not defined" 1>&2
+    fi
+}
+
+# add module=$1 to your  Environment Modules (
+# http://modules.sourceforge.net/
+# TODO: check for existence of module
+function addModule {
+  MODULE="$1"
+  TEMPFILE="$(mktemp)"
+  modulecmd bash add ${MODULE} > ${TEMPFILE}
+  source ${TEMPFILE}
+}
+
+# remove module=$1 from your  Environment Modules (
+# http://modules.sourceforge.net/
+# TODO: check for existence of module
+function removeModule {
+  MODULE="$1"
+  TEMPFILE="$(mktemp)"
+  modulecmd bash rm ${MODULE} > ${TEMPFILE}
+  source ${TEMPFILE}
+}
+
+# # If your computing platform uses Environment Modules (
+# # http://modules.sourceforge.net/
+# # ), load modules for current NCO and IOAPI, noting
+# # how this syntax differs from the commandline.
+# # (Thanks, Barron Henderson for noting this.)
+# # TODO: test for non/existence of paths above!
+# function setupModules {
+#   # for CMD in \
+#   #   "modulecmd bash add ${TERRAE_NCO_MODULE} ${TERRAE_IOAPI_MODULE}" \
+#   # ; do
+#   #   echo -e "$ ${CMD}"
+#   #   eval "${CMD}"
+#   # done
+#   TEMPFILE="$(mktemp)"
+#   modulecmd bash add ${TERRAE_NCO_MODULE} ${TERRAE_IOAPI_MODULE} > ${TEMPFILE}
+#   source ${TEMPFILE}
+# }
+
+# "Comments" lines from running iff _DEBUG='on' (which can be export'ed by caller),
+# and runs with `set xtrace`
+# For `echo`, use DEBUG()
+function DEBUGx {
+  if [[ "${_DEBUG}" == 'on' ]] ; then
+    set -x
+    "$@" 1>&2
+    set +x
+  fi
+} # end function DEBUG
+
+# "Comments" lines from running iff _DEBUG='on'
+# (which can be export'ed by caller)
+function DEBUG {
+  if [[ "${_DEBUG}" == 'on' ]] ; then
+    "$@" 1>&2
+  fi
+} # end function DEBUG

File get_daily_emis_fp.ncl

+;!/usr/bin/env ncl ; requires version >= 5.1.1 for str_sub_str
+;----------------------------------------------------------------------
+; Fillout file path for daily/hourly emissions from template and data.
+
+;----------------------------------------------------------------------
+; libraries
+;----------------------------------------------------------------------
+
+load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_code.ncl"    ; all built-ins?
+
+;----------------------------------------------------------------------
+; functions
+;----------------------------------------------------------------------
+
+;;; Get the (string) path to the file for the hourly emissions for the day.
+undef("get_daily_emis_fp") ; allow redefinition in console
+function get_daily_emis_fp(
+  month [1] : numeric, ; int index of month in year (1..12)
+  day [1] : numeric,   ; int index of day in month (1..31)
+  fp_template [1] : string,  ; template for file path
+  date_template [1] : string ; template for date portion of file path
+)
+local date_string, fp
+begin
+;   date_string = sprinti("%0.2i%0.2i", month, day) ; '%0.'==left pad zeros
+  ; NCL sprint* are so weak
+  date_string = sprinti("%0.2i", month)
+  date_string = date_string + sprinti("%0.2i", day)
+  fp = str_sub_str(fp_template, date_template, date_string)
+  return(fp)
+end ; function get_daily_emis_fp

File get_input_areas.ncl

+;!/usr/bin/env ncl ; requires version >= ???
+;----------------------------------------------------------------------
+; Get areas of GFED/global gridcells from a sample file with the desired schema.
+
+;----------------------------------------------------------------------
+; libraries
+;----------------------------------------------------------------------
+
+load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_code.ncl"    ; all built-ins?
+;load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_csm.ncl"
+load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl" ; for stat_dispersion
+
+;----------------------------------------------------------------------
+; functions
+;----------------------------------------------------------------------
+
+; Calculate area bounded by lons and lats on sphere, as defined by input
+; * variable with coord vars=(lat, lon) (i.e., with those names and that order)
+;   defining the _centers_ of a grid
+; * radius of sphere
+; Returns area as float(lat,lon), but assumes input lat,lon==double.
+undef("lats_lons_sphere_area")  ; allow redefinition in console
+function lats_lons_sphere_area(
+  lat_lon_var [*][*] : numeric, ; 2D
+  radius      [1]    : numeric
+)
+  local Pi, radius_2, sphere_area,\
+    lat_var, lat_size, lat_lines_size, lat_lines, lat_span, i_lat,\
+    lon_var, lon_size, lon_lines_size, lon_lines, lon_span, i_lon    
+begin
+
+  Pi = 3.14159265 ; why do I hafta define this?
+  radius_2 = radius ^ 2 ; will also use later
+  sphere_area = 4 * Pi * radius_2 ; for debugging only
+
+  ;;; get dimensions/coords of passed grid centers var
+  lat_var = lat_lon_var&lat
+  lat_size = dimsizes(lat_var)
+  lat_lines_size = lat_size + 1
+  lon_var = lat_lon_var&lon
+  lon_size = dimsizes(lon_var)
+  lon_lines_size = lon_size + 1
+
+  ;;; create dimensions/coords for grid _lines_ var
+  lat_lines=new( (/ lat_lines_size /), double)
+  lon_lines=new( (/ lon_lines_size /), double)
+  ; assume grid fully spans globe, and grid lines are evenly separated
+  lat_span = 180.0/lat_size ; latitudinal span of each gridcell
+  lon_span = 360.0/lon_size ; longitudinal span of each gridcell
+
+  ; assume `lat` points to grid centers, with "innermost" line a half span away from first center.
+  lat_lines(0) = lat_var(0) - (lat_span/2.0)
+  do i_lat = 1 , lat_size
+    lat_lines(i_lat) = lat_lines(i_lat - 1) + lat_span 
+;    print("lat_lines("+i_lat+")="+lat_lines(i_lat))
+  end do
+
+  ; assume `lon` points to grid centers, with "leftmost" line a half span away from first center.
+  lon_lines(0) = lon_var(0) - (lon_span/2.0)
+  do i_lon = 1 , lon_size
+    lon_lines(i_lon) = lon_lines(i_lon - 1) + lon_span 
+;    print("lon_lines("+i_lon+")="+lon_lines(i_lon))
+  end do
+
+;  print("lats_lons_sphere_area: calculating area array")
+  ; keep dimension order
+  area_arr = new( (/ lat_size, lon_size /), float)
+  do i_lat = 0 , lat_size - 1
+    do i_lon = 0 , lon_size - 1
+      ; `gc_qarea` is built-in!
+      area_arr(i_lat, i_lon) = tofloat(gc_qarea(\
+        (/ lat_lines(i_lat), lat_lines(i_lat+1), lat_lines(i_lat+1), lat_lines(i_lat)   /),\
+        (/ lon_lines(i_lon), lon_lines(i_lon),   lon_lines(i_lon+1), lon_lines(i_lon+1) /)\
+      ) * radius_2)
+    end do ; iterate lon
+  end do ; iterate lat
+
+;  ; start debugging
+;  print("sum(area_arr)="+sum(area_arr)+", sphere_area="+sphere_area)
+;  print("sum(area_arr)/sphere_area="+sum(area_arr)/sphere_area)
+;  ; (0)   sum(area_arr)=5.09889e+14, sphere_area=5.09904e+14
+;  ; (0)   sum(area_arr)/sphere_area=0.99997
+;  ;   end debugging
+
+  return(area_arr) ; as float
+end ; lats_lons_sphere_area
+
+;----------------------------------------------------------------------
+; code
+;----------------------------------------------------------------------
+
+begin ; skip if copy/paste-ing to console
+
+;----------------------------------------------------------------------
+; constants
+;----------------------------------------------------------------------
+
+  ;;; model/run constants
+  this_fn = getenv("GET_INPUT_AREAS_FN") ; for debugging
+  model_year = stringtoint(getenv("MODEL_YEAR"))
+  CMAQ_radius = stringtofloat(getenv("CMAQ_RADIUS"))
+
+  ;;; input constants
+  in_fp = getenv("GFED_AREAS_GLOBAL_SAMPLE_FP")
+  in_datavar_name = getenv("GFED_AREAS_GLOBAL_SAMPLE_DATAVAR_NAME")
+  in_dim_row_name = getenv("GFED_AREAS_GLOBAL_SAMPLE_DIM_ROW_NAME")
+  in_dim_col_name = getenv("GFED_AREAS_GLOBAL_SAMPLE_DIM_COL_NAME")
+
+  ;;; output constants
+  out_fp = getenv("GFED_AREAS_GLOBAL_FP")
+  out_history = getenv("GFED_AREAS_GLOBAL_HISTORY")
+  out_datavar_name = getenv("GFED_AREAS_GLOBAL_DATAVAR_NAME")
+  out_datavar_long_name = getenv("GFED_AREAS_GLOBAL_DATAVAR_LONG_NAME")
+  out_datavar_standard_name = getenv("GFED_AREAS_GLOBAL_DATAVAR_STANDARD_NAME")
+  out_datavar_units = getenv("GFED_AREAS_GLOBAL_DATAVAR_UNITS")
+  out_dim_row_name = getenv("GFED_AREAS_GLOBAL_DIM_ROW_NAME")
+  out_dim_col_name = getenv("GFED_AREAS_GLOBAL_DIM_COL_NAME")
+
+;----------------------------------------------------------------------
+; payload
+;----------------------------------------------------------------------
+
+  ;;; workaround if `source`ing driver? e.g.,
+  ; in_fh = addfile("."+in_fp, "r")  ; file handle
+  ; out_fh = addfile("."+out_fp, "c")  ; file handle
+
+  ;;; get sample input data
+  in_fh = addfile(in_fp, "r") ; use workaround if `source`ing driver
+  in_datavar = in_fh->$in_datavar_name$
+;  lat_lon_arr = in_datavar(:,:)
+;  sample_areas = lats_lons_sphere_area(lat_lon_arr, CMAQ_radius)
+  sample_areas = lats_lons_sphere_area(in_datavar, CMAQ_radius)
+
+  ; start debugging---------------------------------------------------
+  printVarSummary(sample_areas)
+  in_stats = stat_dispersion(sample_areas, False) ; False == don't stdout
+  print(\
+    str_get_nl()                                               + \
+    "gridcell areas (m^2) from " + in_fp + ":" + str_get_nl()  + \
+    "|cells|=" + in_stats(18)                   + str_get_nl() + \
+    "|obs|  =" + in_stats(19)                   + str_get_nl() + \
+    "min    =" + sprintf("%7.3e", in_stats(2))  + str_get_nl() + \
+    "q1     =" + sprintf("%7.3e", in_stats(6))  + str_get_nl() + \
+    "med    =" + sprintf("%7.3e", in_stats(8))  + str_get_nl() + \
+    "mean   =" + sprintf("%7.3e", in_stats(0))  + str_get_nl() + \
+    "q3     =" + sprintf("%7.3e", in_stats(10)) + str_get_nl() + \
+    "max    =" + sprintf("%7.3e", in_stats(14)) + str_get_nl() + \
+    str_get_nl() )
+  ;   end debugging---------------------------------------------------
+
+  ;;; create output data file
+  out_fh = addfile(out_fp, "c") ; use workaround if `source`ing driver
+
+  ;; define global attributes
+  out_fh_att = True ; assign file/global attributes
+  out_fh_att@history = out_history
+  out_fh_att@creation_date = systemfunc("date") ; the canonical way
+  
+  ;; create file data variable (datavar) and its coordinate variables
+  ; fill data
+  temp_var = sample_areas(:,:) ; pass-by-value, I hope
+  ; add datavar attributes
+  temp_var@long_name = out_datavar_long_name
+  temp_var@standard_name = out_datavar_standard_name
+  temp_var@units = out_datavar_units
+  ; name dimensions, provide coordinates
+  temp_var!0 = out_dim_row_name
+  temp_var&$out_dim_row_name$ = in_fh->$in_dim_row_name$
+  temp_var!1 = out_dim_col_name
+  temp_var&$out_dim_col_name$ = in_fh->$in_dim_col_name$
+  ; write datavar
+  out_fh->$out_datavar_name$ = temp_var
+  ; write coordinate attributes: must do this after writing datavar
+  out_fh->$out_dim_row_name$@standard_name = out_fh->$out_dim_row_name$@long_name
+  out_fh->$out_dim_col_name$@standard_name = out_fh->$out_dim_col_name$@long_name
+  
+  ;; write global attributes: can do this after writing datavar
+  fileattdef(out_fh, out_fh_att)
+
+  ; start debugging---------------------------------------------------
+  out_datavar = out_fh->$out_datavar_name$
+  printVarSummary(out_datavar)
+  out_stats = stat_dispersion(sample_areas, False) ; False == don't stdout
+  print(\
+    str_get_nl()                                                + \
+    "gridcell areas (m^2) from " + out_fp + ":" + str_get_nl()  + \
+    "|cells|=" + out_stats(18)                   + str_get_nl() + \
+    "|obs|  =" + out_stats(19)                   + str_get_nl() + \
+    "min    =" + sprintf("%7.3e", out_stats(2))  + str_get_nl() + \
+    "q1     =" + sprintf("%7.3e", out_stats(6))  + str_get_nl() + \
+    "med    =" + sprintf("%7.3e", out_stats(8))  + str_get_nl() + \
+    "mean   =" + sprintf("%7.3e", out_stats(0))  + str_get_nl() + \
+    "q3     =" + sprintf("%7.3e", out_stats(10)) + str_get_nl() + \
+    "max    =" + sprintf("%7.3e", out_stats(14)) + str_get_nl() + \
+    str_get_nl() )
+
+  printVarSummary(out_fh)
+  print(systemfunc("ncdump -h "+out_fp))
+  ;   end debugging---------------------------------------------------
+
+end ; get_input_areas.ncl

File get_output_areas.ncl

+;!/usr/bin/env ncl ; requires version >= ???
+;----------------------------------------------------------------------
+; Get and write areas of AQMEII-NA gridcells from provided map scale factors (MSF)
+; TODO: write coordinate variables
+
+;----------------------------------------------------------------------
+; libraries
+;----------------------------------------------------------------------
+
+load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_code.ncl"    ; all built-ins?
+;load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_csm.ncl"
+load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl" ; for stat_dispersion
+
+;----------------------------------------------------------------------
+; constants
+;----------------------------------------------------------------------
+
+;;; model/run
+this_fn = getenv("GET_OUTPUT_AREAS_FN") ; for debugging
+model_year = stringtoint(getenv("MODEL_YEAR"))
+CMAQ_radius = stringtofloat(getenv("CMAQ_RADIUS"))
+nominal_area = stringtofloat(getenv("AQMEIINA_NOMINAL_GRIDCELL_AREA"))
+
+;;; input/MSF
+in_fp = getenv("GFED_AREAS_MSF_FP_NCL")
+in_datavar_name = getenv("GFED_AREAS_MSF_DATAVAR_NAME")
+in_datavar_dim_row_name = getenv("GFED_AREAS_MSF_DIM_ROW_NAME")
+in_datavar_dim_col_name = getenv("GFED_AREAS_MSF_DIM_COL_NAME")
+; ; positions of dimensions in dimension list
+; in_datavar_dim_row_posn = 2 ; C-style indexing ; stringtoint(getenv("GFED_AREAS_MSF_DIM_ROW_POSN"))
+; in_datavar_dim_col_posn = 3 ; C-style indexing ; stringtoint(getenv("GFED_AREAS_MSF_DIM_COL_POSN"))
+
+;;; output/areas
+out_fp = getenv("GFED_AREAS_AQMEIINA_FP")
+out_history = getenv("GFED_AREAS_AQMEIINA_HISTORY")
+out_datavar_name = getenv("GFED_AREAS_AQMEIINA_DATAVAR_NAME")
+out_datavar_long_name = getenv("GFED_AREAS_AQMEIINA_DATAVAR_LONG_NAME")
+out_datavar_standard_name = getenv("GFED_AREAS_AQMEIINA_DATAVAR_STANDARD_NAME")
+out_datavar_units = getenv("GFED_AREAS_AQMEIINA_DATAVAR_UNITS")
+out_datavar_dim_row_name = getenv("GFED_AREAS_AQMEIINA_DIM_ROW_NAME")
+out_datavar_dim_col_name = getenv("GFED_AREAS_AQMEIINA_DIM_COL_NAME")
+
+;----------------------------------------------------------------------
+; functions
+;----------------------------------------------------------------------
+
+;----------------------------------------------------------------------
+; code
+;----------------------------------------------------------------------
+
+begin ; skip if copy/paste-ing to console
+
+;----------------------------------------------------------------------
+; payload
+;----------------------------------------------------------------------
+
+  ;;; workaround if `source`ing driver? e.g.,
+  ; in_fh = addfile("."+in_fp, "r")  ; file handle
+  ; out_fh = addfile("."+out_fp, "c")  ; file handle
+
+  ;;; get MSFs
+  in_fh = addfile(in_fp, "r") ; use workaround if `source`ing driver
+  in_datavar = in_fh->$in_datavar_name$
+
+;   ; start debugging---------------------------------------------------
+;   printVarSummary(in_datavar)
+; ; Variable: in_datavar
+; ; Type: float
+; ; Total Size: 548964 bytes
+; ;             137241 values
+; ; Number of Dimensions: 4
+; ; Dimensions and sizes: [TSTEP | 1] x [LAY | 1] x [ROW | 299] x [COL | 459]
+; ; Coordinates: 
+; ; Number Of Attributes: 3
+; ;   long_name : MSFX2           
+; ;   units :     (M/M)**2        
+; ;   var_desc :  squared map-scale factor (CROSS)                                                
+;   ;   end debugging---------------------------------------------------
+
+  ;;; compute gridcell areas
+  MSF_arr = (/ in_datavar(0,0,:,:) /) ; only one TSTEP, LAY. TODO: parameterize
+  areas_arr = (/ nominal_area * MSF_arr /)
+
+;   ; start debugging---------------------------------------------------
+;   printVarSummary(areas_arr)
+; ; Variable: areas_arr
+; ; Type: float
+; ; Total Size: 548964 bytes
+; ;             137241 values
+; ; Number of Dimensions: 2
+; ; Dimensions and sizes: [299] x [459]
+
+;   in_stats = stat_dispersion(areas_arr, False) ; False == don't stdout
+;   print(\
+;     str_get_nl()                                               + \
+;     "gridcell areas (m^2):"                     + str_get_nl() + \
+;     "|cells|=" + in_stats(18)                   + str_get_nl() + \
+;     "|obs|  =" + in_stats(19)                   + str_get_nl() + \
+;     "min    =" + sprintf("%7.3e", in_stats(2))  + str_get_nl() + \
+;     "q1     =" + sprintf("%7.3e", in_stats(6))  + str_get_nl() + \
+;     "med    =" + sprintf("%7.3e", in_stats(8))  + str_get_nl() + \
+;     "mean   =" + sprintf("%7.3e", in_stats(0))  + str_get_nl() + \
+;     "q3     =" + sprintf("%7.3e", in_stats(10)) + str_get_nl() + \
+;     "max    =" + sprintf("%7.3e", in_stats(14)) + str_get_nl() + \
+;     str_get_nl() )
+; ; areas (m^2):
+; ; |cells|=137241
+; ; |obs|  =137241
+; ; min    =1.424e+08
+; ; q1     =1.431e+08
+; ; med    =1.452e+08
+; ; mean   =1.462e+08
+; ; q3     =1.487e+08
+; ; max    =1.579e+08
+;   ;   end debugging---------------------------------------------------
+
+  ;;; create output data file
+  out_fh = addfile(out_fp, "c") ; use workaround if `source`ing driver
+
+  ;; define global attributes
+  out_fh_att = True ; assign file/global attributes
+  out_fh_att@history = out_history
+  out_fh_att@creation_date = systemfunc("date") ; the canonical way
+  
+  ;; create file data variable (datavar)
+  ; fill data
+  temp_var = areas_arr(:,:) ; pass-by-value, I hope
+  ; add datavar attributes
+  temp_var@long_name = out_datavar_long_name
+  temp_var@standard_name = out_datavar_standard_name
+  temp_var@units = out_datavar_units
+  ; name dimensions
+  temp_var!0 = out_datavar_dim_row_name
+  temp_var!1 = out_datavar_dim_col_name
+  ; TODO: create coordinates
+;  temp_var&$out_datavar_dim_col_name$ = in_fh->$in_datavar_dim_col_name$
+;  temp_var&$out_datavar_dim_col_name$ = temp_var!1
+;  temp_var&$out_datavar_dim_col_name$ = in_datavar!3
+  ; write datavar
+  out_fh->$out_datavar_name$ = temp_var
+  ; TODO: write coordinate attributes: must do this after writing datavar
+;   out_fh->$out_datavar_dim_row_name$@standard_name = out_fh->$out_datavar_dim_row_name$@long_name
+;   out_fh->$out_datavar_dim_col_name$@standard_name = out_fh->$out_datavar_dim_col_name$@long_name
+  
+  ;; write global attributes: can do this after writing datavar
+  fileattdef(out_fh, out_fh_att)
+
+;   ; start debugging---------------------------------------------------
+;   out_datavar = out_fh->$out_datavar_name$
+;   printVarSummary(out_datavar)
+; ; Variable: out_datavar
+; ; Type: float
+; ; Total Size: 548964 bytes
+; ;             137241 values
+; ; Number of Dimensions: 2
+; ; Dimensions and sizes: [ROW | 299] x [COL | 459]
+; ; Coordinates: 
+; ; Number Of Attributes: 3
+; ;   units :     m2
+; ;   standard_name :     area
+; ;   long_name : area of grid cell
+
+;   out_stats = stat_dispersion(out_datavar, False) ; False == don't stdout
+;   print(\
+;     str_get_nl()                                                + \
+;     "gridcell areas (m^2) from " + out_fp + ":" + str_get_nl()  + \
+;     "|cells|=" + out_stats(18)                   + str_get_nl() + \
+;     "|obs|  =" + out_stats(19)                   + str_get_nl() + \
+;     "min    =" + sprintf("%7.3e", out_stats(2))  + str_get_nl() + \
+;     "q1     =" + sprintf("%7.3e", out_stats(6))  + str_get_nl() + \
+;     "med    =" + sprintf("%7.3e", out_stats(8))  + str_get_nl() + \
+;     "mean   =" + sprintf("%7.3e", out_stats(0))  + str_get_nl() + \
+;     "q3     =" + sprintf("%7.3e", out_stats(10)) + str_get_nl() + \
+;     "max    =" + sprintf("%7.3e", out_stats(14)) + str_get_nl() + \
+;     str_get_nl() )
+; ; gridcell areas (m^2) from ./areas_AQMEII-NA.nc:
+; ; |cells|=137241
+; ; |obs|  =137241
+; ; min    =1.424e+08
+; ; q1     =1.431e+08
+; ; med    =1.452e+08
+; ; mean   =1.462e+08
+; ; q3     =1.487e+08
+; ; max    =1.579e+08
+
+;   print(systemfunc("ncdump -h "+out_fp))
+; ; netcdf areas_AQMEII-NA {
+; ; dimensions:
+; ;       ROW = 299 ;
+; ;       COL = 459 ;
+; ; variables:
+; ;       float cell_area(ROW, COL) ;
+; ;               cell_area:units = "m2" ;
+; ;               cell_area:standard_name = "area" ;
+; ;               cell_area:long_name = "area of grid cell" ;
+; ; 
+; ; // global attributes:
+; ;               :creation_date = "Tue Apr 16 15:49:20 EDT 2013" ;
+; ;               :history = "see https://bitbucket.org/tlroche/gfed-3.1_global_to_aqmeii-na" ;
+; ; }
+;   ;   end debugging---------------------------------------------------
+
+end ; get_output_areas.ncl

File lats_lons_sphere_area.ncl

+;;; Calculate area bounded by lons and lats on sphere, as defined by input
+;;; * variable with coord vars=(lat, lon) (i.e., with those names and that order)
+;;;   defining the _centers_ of a grid
+;;; * radius of sphere
+;;; Returns area as float(lat,lon), but assumes input lat,lon==double.
+
+;;; ASSERT: assumes/requires the input (lat_lon_arr)
+;;; * (AFAIK, due to NCL limitations in querying dimensions) has coordinate variables named 'lat' and 'lon'
+;;; * has global extent
+
+undef("lats_lons_sphere_area")  ; allow redefinition in console
+function lats_lons_sphere_area(
+  lat_lon_arr [*][*] : numeric, ; 2D
+  radius      [1]    : numeric
+)
+  local Pi, radius_2, sphere_area,\
+    lat_vec, lat_size, lat_lines_size, lat_lines, lat_span, i_lat,\
+    lon_vec, lon_size, lon_lines_size, lon_lines, lon_span, i_lon    
+begin
+
+  Pi = 3.14159265 ; why do I hafta define this?
+  radius_2 = radius ^ 2 ; will also use later
+  sphere_area = 4 * Pi * radius_2 ; for debugging only
+
+  ;;; get dimensions/coords of passed grid centers var
+  lat_vec = lat_lon_arr&lat
+  lat_size = dimsizes(lat_vec)
+  lat_lines_size = lat_size + 1
+  lon_vec = lat_lon_arr&lon
+  lon_size = dimsizes(lon_vec)
+  lon_lines_size = lon_size + 1
+
+  ;;; create dimensions/coords for grid _lines_ var
+  lat_lines=new( (/ lat_lines_size /), double)
+  lon_lines=new( (/ lon_lines_size /), double)
+  ; assume grid fully spans globe, and grid lines are evenly separated
+  lat_span = 180.0/lat_size ; latitudinal span of each gridcell
+  lon_span = 360.0/lon_size ; longitudinal span of each gridcell
+
+  ; assume `lat` points to grid centers, with "innermost" line a half span away from first center.
+  lat_lines(0) = lat_vec(0) - (lat_span/2.0)
+  do i_lat = 1 , lat_size
+    lat_lines(i_lat) = lat_lines(i_lat - 1) + lat_span 
+;    print("lat_lines("+i_lat+")="+lat_lines(i_lat))
+  end do
+
+  ; assume `lon` points to grid centers, with "leftmost" line a half span away from first center.
+  lon_lines(0) = lon_vec(0) - (lon_span/2.0)
+  do i_lon = 1 , lon_size
+    lon_lines(i_lon) = lon_lines(i_lon - 1) + lon_span 
+;    print("lon_lines("+i_lon+")="+lon_lines(i_lon))
+  end do
+
+;  print("lats_lons_sphere_area: calculating area array")
+  ; keep dimension order
+  area_arr = new( (/ lat_size, lon_size /), float)
+  do i_lat = 0 , lat_size - 1
+    do i_lon = 0 , lon_size - 1
+      ; `gc_qarea` is built-in!
+      area_arr(i_lat, i_lon) = tofloat(gc_qarea(\
+        (/ lat_lines(i_lat), lat_lines(i_lat+1), lat_lines(i_lat+1), lat_lines(i_lat)   /),\
+        (/ lon_lines(i_lon), lon_lines(i_lon),   lon_lines(i_lon+1), lon_lines(i_lon+1) /)\
+      ) * radius_2)
+    end do ; iterate lon
+  end do ; iterate lat
+
+;  ; start debugging
+;  print("sum(area_arr)="+sum(area_arr)+", sphere_area="+sphere_area)
+;  print("sum(area_arr)/sphere_area="+sum(area_arr)/sphere_area)
+;  ; (0)   sum(area_arr)=5.09889e+14, sphere_area=5.09904e+14
+;  ; (0)   sum(area_arr)/sphere_area=0.99997
+;  ;   end debugging
+
+  return(area_arr) ; as float
+end ; lats_lons_sphere_area

File netCDF.stats.to.stdout.r

+# R code to write simple stats (min, mean, quartiles, max) for an input file.
+# Run this like
+# $ Rscript ./netCDF.stats.to.stdout.r netcdf.fp=./GEIA_N2O_oceanic.nc data.var.name=emi_n2o
+# or
+# > source('./netCDF.stats.to.stdout.r')
+# > netCDF.stats.to.stdout(...)
+
+# ----------------------------------------------------------------------
+# constants
+# ----------------------------------------------------------------------
+
+this.fn <- 'netCDF.stats.to.stdout.r'      # TODO: get like $0
+
+# ----------------------------------------------------------------------
+# functions
+# ----------------------------------------------------------------------
+
+# syntactic sugar
+q1 <- function(vec) { quantile(vec, 0.25) } # first quartile
+q3 <- function(vec) { quantile(vec, 0.75) } # third quartile
+
+# the main event
+netCDF.stats.to.stdout <- function(
+  netcdf.fp, # /path/to/netcdf/file, can be relative or FQ
+  data.var.name,   # name of data variable (datavar) of interest
+  stats.precision=3 # sigdigs to use for min, median, max of obs  
+) {
+
+  # TODO: test arguments!
+
+#  # start debug
+#  cat(sprintf(
+#    '%s: netcdf.fp==%s, data.var.name==%s\n',
+#    this.fn, netcdf.fp, data.var.name))
+#  system(sprintf('ls -alth %s', netcdf.fp))
+#  system(sprintf('ncdump -h %s', netcdf.fp))
+#  #   end debug
+
+  # create strings for use in output below (far below)
+  # double-sprintf-ing to set precision by constant: cool or brittle?
+  stat.str.template <- sprintf('%%.%ig', stats.precision)
+  # use these in function=subtitle.stats as sprintf inputs
+  sum.str.template <- sprintf('sum=%s', stat.str.template)
+  max.str.template <- sprintf('max=%s', stat.str.template)
+  mea.str.template <- sprintf('mean=%s', stat.str.template)
+  med.str.template <- sprintf('med=%s', stat.str.template)  # median==2nd quartile
+  min.str.template <- sprintf('min=%s', stat.str.template)
+  q1.str.template <- sprintf('q1=%s', stat.str.template)    # 1st quartile
+  q3.str.template <- sprintf('q3=%s', stat.str.template)    # 3rd quartile
+
+  # needed to parse netCDF
+  library(ncdf4)
+
+  # open netCDF file, uncautiously
+  # NOTE: you must assign when you nc_open!
+  netcdf.file <- ncdf4::nc_open(
+    filename=netcdf.fp,
+    write=FALSE,    # will only read below
+    readunlim=TRUE) # it's a small file
+
+  # uncautiously get the data out of the datavar
+  data.var.data <- ncvar_get(
+    nc=netcdf.file,
+    varid=data.var.name)
+
+#  # start debug
+#  cat(sprintf('%s: data.var.name==%s has size==\n',
+#    this.fn, data.var.name))
+#  print(dim(data.var.data))
+#  #   end debug
+
+  if (is.numeric(data.var.data) && sum(!is.na(data.var.data))) {
+#    unsparse.data <- data.var.data[!is.na(data.var.data)]
+    # collapse its structure
+    unsparse.data <- c(data.var.data[!is.na(data.var.data)])
+    obs.n <- length(unsparse.data)
+    if (obs.n > 0) {
+      stats.to.stdout(
+        data.nonNA=unsparse.data,
+        data.nonNA.n=obs.n,
+        data.raw.n=length(data.var.data),
+        sig.digs=stats.precision,
+        title=sprintf('From %s datavar=%s:', netcdf.fp, data.var.name)
+      )
+    } else {
+      cat(sprintf('%s: %s var=%s has no non-NA data',
+        this.fn, data.var.name, netcdf.fp))
+    }
+  } else {
+    cat(sprintf('%s: %s var=%s has no numeric non-NA data',
+      this.fn, data.var.name, netcdf.fp))
+  }
+
+  # teardown
+  nc_close(netcdf.file)
+} # end function netCDF.stats.to.stdout
+
+stats.to.stdout <- function(
+  data.nonNA,    # numeric array minus NAs, no other requirements (?)
+  data.nonNA.n,  # its length
+  data.raw.n,    # length of data from which data.nonNA was derived
+  sig.digs=3,    # significant digits for numeric fields
+  title=''       # one line identifying data source
+) {
+  # TODO: test arguments!
+
+  # create strings for use in output
+  # double-sprintf-ing to set precision by constant: cool or brittle?
+  stat.template <- sprintf('%%.%ig', sig.digs)
+  # use these in function=subtitle.stats as sprintf inputs
+  sum.template <- sprintf('sum=%s', stat.template)
+  max.template <- sprintf('max=%s', stat.template)
+  mea.template <- sprintf('mean=%s', stat.template)
+  med.template <- sprintf('med=%s', stat.template)  # median==2nd quartile
+  min.template <- sprintf('min=%s', stat.template)
+  q1.template <- sprintf('q1=%s', stat.template)    # 1st quartile
+  q3.template <- sprintf('q3=%s', stat.template)    # 3rd quartile
+
+  cells.str <- sprintf('cells=%i', data.raw.n)
+  obs.str <- sprintf('obs=%i', data.nonNA.n)
+  min.str <- sprintf(min.template, min(data.nonNA))
+  q1.str <- sprintf(q1.template, q1(data.nonNA))
+  mea.str <- sprintf(mea.template, mean(data.nonNA))
+  med.str <- sprintf(med.template, median(data.nonNA))
+  q3.str <- sprintf(q3.template, q3(data.nonNA))
+  max.str <- sprintf(max.template, max(data.nonNA))
+  sum.str <- sprintf(sum.template, sum(data.nonNA))
+
+  # at last: output!
+  cat(sprintf('%s\n', title))
+  cat(sprintf('\t%s\n', cells.str))
+  cat(sprintf('\t%s\n', obs.str))
+  # 6-number summary
+  cat(sprintf('\t%s\n', min.str))
+  cat(sprintf('\t%s\n', q1.str))
+  cat(sprintf('\t%s\n', med.str))
+  cat(sprintf('\t%s\n', mea.str))
+  cat(sprintf('\t%s\n', q3.str))
+  cat(sprintf('\t%s\n', max.str))
+  cat(sprintf('\t%s\n', sum.str))
+} # end function stats.to.stdout
+
+netCDF.stats.to.stdout.by.timestep <- function(
+  netcdf.fp, # /path/to/netcdf/file, can be relative or FQ
+  data.var.name,   # name of data variable (datavar) of interest
+  time.dim.name='time', # of time dimension in datavar args
+  stats.precision=3 # sigdigs to use for min, median, max of obs
+) {
+
+  # TODO: test arguments!
+
+#  # start debug
+#  cat(sprintf(
+#    '%s: netcdf.fp==%s, data.var.name==%s, time.dim.name=%s\n',
+#    this.fn, netcdf.fp, data.var.name, time.dim.name))
+#  system(sprintf('ls -alth %s', netcdf.fp))
+#  system(sprintf('ncdump -h %s', netcdf.fp))
+#  #   end debug
+
+  # needed to parse netCDF
+  library(ncdf4)
+
+  # open netCDF file, uncautiously
+  # NOTE: you must assign when you nc_open!
+  netcdf.file <- ncdf4::nc_open(
+    filename=netcdf.fp,
+    write=FALSE,    # will only read below
+    readunlim=TRUE) # it's a small file? TODO: read by timestep!
+
+  # Find the index of the datavar of interest
+  # (in the list of all non-coordinate variables).
+  netcdf.file.vars.n <- netcdf.file$nvars
+  data.var.index <- -1 # an invalid index
+  for (i in 1:netcdf.file.vars.n) {
+    netcdf.file.var <- netcdf.file$var[[i]]
+    if (netcdf.file.var$name == data.var.name) {
+      data.var.index <- i
+    }
+  }
+  if (data.var.index == -1) {
+    stop(sprintf('%s: ERROR: failed to find data.var.index\n', this.fn))
+  }
+
+#  # start debug
+#  cat(sprintf('%s: data.var.name==%s has var posn=%i and size==\n',
+#    this.fn, data.var.name, data.var.index))
+#  print(dim(data.var.data))
+#  #   end debug
+
+  # can only compute n.timesteps once we have the datavar's dims ...
+  data.var <- netcdf.file$var[[data.var.index]]
+  data.var.dims <- data.var$size
+  data.var.dims.n <- data.var$ndims
+  # ... and particularly the position of dimension$name==time.dim.name
+  time.dim.index <- -1 # an invalid index
+  for (i in 1:data.var.dims.n) {
+    data.var.dim <- data.var$dim[[i]]
+    if (data.var.dim$name == time.dim.name) {
+      time.dim.index <- i
+    }
+  }
+  if (time.dim.index == -1) {
+    stop(sprintf("%s: ERROR: failed to find time.dim.index for time.dim.name='%s'\n",
+      this.fn, time.dim.name))
+  } # else
+  n.timesteps <- data.var.dims[time.dim.index]
+
+#  # start debug
+#  cat(sprintf('%s: time.dim.name==%s has dim posn=%i and size=%i\n',
+#    this.fn, time.dim.name, time.dim.index, n.timesteps))
+#  #   end debug
+
+# TODO: ncdf4 bug?
+#  # compute read vectors for use in `ncvar_get`: read all timestep values
+#  vec.start.template <- rep(1, data.var.dims.n)   # ASSERT: constant
+##  vec.count.template <- rep(-1, data.var.dims.n)  # ASSERT: constant
+#  # see bug below, try instead
+#  vec.count.template <- data.var.dims             # ASSERT: constant
+
+  # iterate over the timesteps
+  for (i in 1:n.timesteps) {
+
+#    vec.start <- vec.start.template
+#    vec.start[time.dim.index] <- i # only get the i'th timestep
+#    vec.count <- vec.count.template
+#    vec.count[time.dim.index] <- i # only get the i'th timestep
+#
+#    # start debug
+#    cat(sprintf('%s: vec.start==\n', this.fn))
+#    print(vec.start)
+#    cat(sprintf('%s: vec.count==\n', this.fn))
+#    print(vec.count)
+#    #   end debug
+
+    data.var.data <- ncdf4::ncvar_get(
+      nc=netcdf.file,
+      varid=data.var.name
+# TODO: {debug, bug report} why this fails
+#      varid=data.var.name,
+#      start=vec.start,
+#      count=vec.count
+    )
+# each loop call increases time dimension! e.g.,
+# > netCDF.stats.to.stdout.r: dim(data.var.data)==
+# > [1] 144  96
+# ...
+# > netCDF.stats.to.stdout.r: dim(data.var.data)==
+# > [1] 144  96   2
+# ...
+# > netCDF.stats.to.stdout.r: dim(data.var.data)==
+# > [1] 144  96   3
+    # workaround: read unlimited (above), then just take the slice of interest
+    # TODO: handle dimensions more generically
+    data.var.data.dim.len <- length(dim(data.var.data))
+    if        (data.var.data.dim.len == 3) {
+      data.var.timestep <- data.var.data[,,i]
+    } else if (data.var.data.dim.len == 4) {
+      data.var.timestep <- data.var.data[,,,i]
+    }
+                                  
+#    # start debug
+#    cat(sprintf('%s: dim(data.var.timestep)==\n', this.fn))
+#    print(dim(data.var.timestep))
+#    cat(sprintf('%s: summary(data.var.timestep)==\n', this.fn))
+#    print(summary(c(data.var.timestep))) # collapse its structure
+#    #   end debug
+
+    if (is.numeric(data.var.timestep) && sum(!is.na(data.var.timestep))) {
+#      unsparse.data <- data.var.timestep[!is.na(data.var.timestep)]
+      # collapse its structure
+      unsparse.data <- c(data.var.timestep[!is.na(data.var.timestep)])
+      obs.n <- length(unsparse.data)
+      if (obs.n > 0) {
+        stats.to.stdout(
+          data.nonNA=unsparse.data,
+          data.nonNA.n=obs.n,
+          data.raw.n=length(data.var.timestep),
+          sig.digs=stats.precision,
+          title=sprintf('For %s datavar=%s, timestep=%i of %i', netcdf.fp, data.var.name, i, n.timesteps)
+        )
+      } else {
+        cat(sprintf('%s: %s data.var=%s has no non-NA data',
+          this.fn, data.var.name, netcdf.fp))
+      }
+    } else {
+      cat(sprintf('%s: %s data.var=%s has no numeric non-NA data',
+        this.fn, data.var.name, netcdf.fp))
+    } # end if (is.numeric(data.var.timestep) && sum(!is.na(data.var.timestep)))
+
+    rm( # debugging why output is same for each timestep
+#      data.var.data,
+      data.var.timestep,
+      unsparse.data)
+
+  } # end for (i in 1:n.timesteps)
+
+  # teardown
+  ncdf4::nc_close(netcdf.file)
+  rm(this.fn) # hopefully prevents overwriting `this.fn` in prior namespaces
+} # end function netCDF.stats.to.stdout.by.timestep
+
+# ----------------------------------------------------------------------
+# code
+# ----------------------------------------------------------------------
+
+# if this is called as a script, provide a main(): see
+# https://stat.ethz.ch/pipermail/r-help/2012-September/323551.html
+# https://stat.ethz.ch/pipermail/r-help/2012-September/323559.html
+if (!interactive()) {
+
+# start debug
+#  cat(sprintf('%s: interactive()==TRUE\n', this.fn))
+#   end debug
+  
+  # TODO: fix `strsplit` regexp below to make this unnecessary
+  library(stringr)
+
+  # pass named arguments: var above separated by '='
+
+  args <- commandArgs(TRUE)
+  # args is now a list of character vectors
+  # First check to see if any arguments were passed, then evaluate each argument:
+  # assign val (RHS) to key (LHS) for arguments of the (required) form 'key=val'
+  if (length(args)==0) {
+    cat(sprintf('%s: no arguments supplied, exiting\n', this.fn))
+#    q(status=1) # KLUDGE:
+# Currently this is not seeing arguments when called from Rscript,
+# so this exit also exits the caller :-(    
+  } else {
+  # simple positional args work
+  # TODO: also support positional usage
+  #  netcdf.fp <- args[1]
+  #  data.var.name <- args[2]
+    # TODO: test arg length: 2 is required!
+
+# start debug
+#    cat(sprintf('%s: got length(args)==%i\n', this.fn, length(args)))
+#   end debug
+
+    for (i in 1:length(args)) {
+#       eval(parse(text=args[[i]]))
+      # `eval(parse())` is unsafe and requires awkward quoting:
+      # e.g., of the following (bash) commandlines
+
+      # - Rscript ./netCDF.stats.to.stdout.r netcdf.fp="GEIA_N2O_oceanic.nc" data.var.name="emi_n2o"
+      #   fails
+
+      # + Rscript ./netCDF.stats.to.stdout.r 'netcdf.fp="GEIA_N2O_oceanic.nc"' 'data.var.name="emi_n2o"'
+      #   succeeds
+
+      # so instead
+      # TODO: use package `optparse` or `getopt`
+      args.keyval.list <-
+        strsplit(as.character(parse(text=args[[i]])),
+          split='[[:blank:]]*<-|=[[:blank:]]*', fixed=FALSE)
+  #                            split='[ \t]*<-|=[ \t]*', fixed=FALSE)
+      args.keyval.vec <- unlist(args.keyval.list, recursive=FALSE, use.names=FALSE)
+      # TODO: test vector elements!
+      # Neither wants to remove all whitespace from around arguments :-( so
+      args.key <- str_trim(args.keyval.vec[1], side="both")
+      args.val <- str_trim(args.keyval.vec[2], side="both")
+
+# start debug
+#       cat(sprintf('%s: got\n', this.fn))
+#       cat('\targs.keyval.list==\n')
+#       print(args.keyval.list)
+#       cat('\targs.keyval.vec==\n')
+#       print(args.keyval.vec)
+#       cat(sprintf('\targs.key==%s\n', args.key))
+#       cat(sprintf('\targs.val==%s\n', args.val))
+#   end debug
+
+      # A real case statement would be nice to have
+      if        (args.key == 'netcdf.fp') {
+        netcdf.fp <- args.val
+      } else if (args.key == 'data.var.name') {
+        data.var.name <- args.val
+      } else {
+        stop(sprintf("unknown argument='%s'", args.key))
+        # TODO: show usage
+        q(status=1) # exit with error
+      }
+    } # end for loop over arguments
+
+    # payload!
+    netCDF.stats.to.stdout(netcdf.fp, data.var.name)
+
+  } # end if testing number of arguments
+} # end if (!interactive())

File plotLayersForTimestep.r

+# modified from plotLayersForTimestep.r.1
+# * move device control outside of these methods
+# * refactor code for plotting single dataset, for reuse
+
+library(fields)
+library(raster)
+
+# double-sprintf-ing to set precision by constant: cool or brittle?
+stats.precision <- 3 # sigdigs to use for min, median, max of obs
+stat.str <- sprintf('%%.%ig', stats.precision)
+# use these in function=subtitle.stats as sprintf inputs
+max.str <- sprintf('max=%s', stat.str)
+med.str <- sprintf('med=%s', stat.str)
+min.str <- sprintf('min=%s', stat.str)
+
+plot.layers.for.timestep <- function(
+  datavar,          # data variable
+  datavar.name,     # string naming the datavar # TODO: get from datavar
+  datavar.parent,   # file object containing the datavar
+  i.timestep=1,     # index of timestep to plot
+  n.layers=0,       # max number of layers (in timestep) to plot
+  attrs.list,       # list of global attributes
+                    # TODO: handle when null!
+  q.vec=NULL,       # quantile bounds
+  l2d.fp=NULL,      # maps layer# to crop description
+  colors,
+  map
+) {
+  for (i.layer in 1:n.layers) {
+# debugging
+# i.layer <- 1
+
+    # get title string:
+    # minimally:
+    title <- sprintf('%s: layer#=%2i', datavar.name, i.layer)
+    if (!is.null(l2d.fp)) {      # TODO: test file readability
+      l2d.env <- readRDS(l2d.fp) # TODO: test me!
+      title <- sprintf('%s (%s)',
+        title, l2d.env[[as.character(i.layer)]])
+    } else {
+      cat(sprintf(
+        'ERROR: plot.layers.for.timestep: no file mapping layer#s to descriptions\n'))
+    }
+    attr.list <-
+      ncatt_get(datavar.parent, varid=datavar.name, attname="units")
+    if (attr.list$hasatt) {
+      title <- sprintf('%s, units=%s', title, attr.list$value)
+    } else {
+      cat(sprintf(
+        'ERROR: plot.layers.for.timestep: no units for var=%s\n',
+        datavar.name))
+    }
+
+    data <- datavar[,,i.layer]
+# start debugging for Doug Nychka Mon, 13 Feb 2012 21:33:36 -0700
+#    print(paste('class(data)==', class(data), sep=""))
+#   end debugging for Doug Nychka Mon, 13 Feb 2012 21:33:36 -0700
+
+    # get stats for subtitle
+    # to put under title, just create second line (thanks, Doug Nychka)
+    title <- sprintf('%s\n%s', title, subtitle.stats(data))
+
+    plot.layer(data,
+    title=title,
+    attrs.list=attrs.list,
+    q.vec=q.vec,
+    colors=colors,
+    map=map)
+  } # end interating layers
+} # end function plot.layers.for.timestep
+
+subtitle.stats <- function(vec) {
+  return.str <- ""
+  # is it numeric, and not empty?
+  if (is.numeric(vec) && sum(!is.na(vec))) {
+#    unsparse.vec <- subset(vec, !is.na(vec)) # fail: intended for interactive use
+#    unsparse.vec <- na.omit(vec) # fail: omits all *rows* containing an NA!
+    grids <- length(vec)
+    grids.str <- sprintf('(of cells=%i)', grids)
+    unsparse.vec <- vec[!is.na(vec)]
+    obs <- length(unsparse.vec)
+    obs.str <- sprintf('obs=%i', obs)
+    # use constants defined above. TODO: compute these once!
+    max.str <- sprintf(max.str, max(unsparse.vec))
+    med.str <- sprintf(med.str, median(unsparse.vec))
+    min.str <- sprintf(min.str, min(unsparse.vec))
+    return.str <-
+      sprintf('%s %s: %s, %s, %s',
+              obs.str, grids.str, min.str, med.str, max.str)
+  } else {
+    return.str <-"no data"
+  }
+  return.str
+} # end function subtitle.stats
+
+plot.before.and.after.layers.for.timestep <- function(
+  source.datavar,   # source/unmodified data variable
+  target.datavar,   # target/modified data variable
+  datavar.name,     # string naming the datavar
+  i.timestep=1,     # index of timestep to plot
+  datavar.n.layers, # max number of layers (in timestep) to plot
+  attrs.list,       # list of global attributes
+  q.vec=NULL,       # for quantiles # TODO: handle when null!
+  colors,
+  map
+) {
+  for (i.layer in 1:datavar.n.layers) {
+# debugging
+# i.layer <- 1
+
+#    source.data <- source.datavar[,,i.layer,i.timestep]
+    source.data <- source.datavar[,,i.layer]
+#    target.data <- target.datavar[,,i.layer,i.timestep]
+    target.data <- target.datavar[,,i.layer]
+# start debugging for Doug Nychka Mon, 13 Feb 2012 21:33:36 -0700
+#    print(paste('class(source.data)==', class(source.data), sep=""))
+#    print(paste('class(target.data)==', class(target.data), sep=""))
+#   end debugging for Doug Nychka Mon, 13 Feb 2012 21:33:36 -0700
+
+    source.title <-
+      sprintf('%s original: layer#=%2i', datavar.name, i.layer)
+#    source.title <-
+#      sprintf('%s original: layer#=%2i (%s) # when we have the crop name
+    target.title <-
+      sprintf('%s modified: layer#=%2i', datavar.name, i.layer)
+#    target.title <-
+#      sprintf('%s modified: layer#=%2i (%s) # when we have the crop name
+    attr.list <-
+      ncatt_get(datavar.parent, varid=datavar.name, attname="units")
+    if (attr.list$hasatt) {
+      source.title <-
+        sprintf('%s, units=%s', source.title, attr.list$value)
+      target.title <-
+        sprintf('%s, units=%s', target.title, attr.list$value)
+    } else {
+      cat(sprintf(
+        'plot.layers.for.timestep: ERROR: no units for var=%s\n',
+        datavar.name))
+    }
+    plot.layer(source.data,
+      title=source.title,
+#      sub="subtitle",
+      sub=subtitle.stats(data),
+      attrs.list=attrs.list,
+      q.vec=q.vec,
+      colors=colors,
+      map=map)
+    plot.layer(target.data,
+      title=target.title,
+#      sub="subtitle",
+      sub=subtitle.stats(data),
+      attrs.list=attrs.list,
+      q.vec=q.vec,
+      colors=colors,
+      map=map)
+  } # end interating layers
+} # end function plot.before.and.after.layers.for.timestep
+
+plot.layer <- function(
+  data,             # data to plot (required)
+  title,            # string for plot title (required?)
+                    # TODO: handle when null!
+  subtitle=NULL,    # string for plot subtitle
+  attrs.list=NULL,  # list of global attributes (used for plotting)
+  q.vec=NULL,       # for quantiles
+  colors,
+  map
+) {
+  x.centers <- attrs.list$x.cell.centers.km
+  y.centers <- attrs.list$y.cell.centers.km
+  plot.data(data, title, subtitle, x.centers, y.centers, q.vec, colors, map)
+} # end function plot.layer
+
+# return a vector of the centers of the columns of a Raster
+raster.centers.x <- function(raster) {
+  # indices of the cells in the first row==c(1:ncol(r))
+  # xyFromCell(r, c(1:ncol(r))) returns a matrix of the centers of those cells
+  # we want all, and only, the x values (y values are identical),
+  # i.e., column=1, 
+  # and I believe we want them in ascending order (default for `sort`)
+  return(sort(xyFromCell(raster, c(1:ncol(raster)) )[,1]))
+}
+
+# return a vector of the centers of the rows of a Raster
+raster.centers.y <- function(raster) {
+  # indices of the cells in the first col==seq(1, ncell(r), by=ncol(r))
+  # xyFromCell(r, seq(1, ncell(r), by=ncol(r))) returns a matrix of the centers of those cells
+  # we want all, and only, the y values (x values are identical),
+  # i.e., column=2, 
+  # and I believe we want them in ascending order (default for `sort`)
+  return(sort(xyFromCell(raster, seq(1, ncell(raster), by=ncol(raster)))[,2]))
+}
+
+plot.raster <- function(
+  raster,           # data to plot (required)
+  title,            # string for plot title (required?)
+                    # TODO: handle when null!
+  subtitle=NULL,    # string for plot subtitle
+  q.vec=NULL,       # for quantiles
+  colors,
+  map
+) {
+  x.centers <- raster.centers.x(raster)
+  y.centers <- raster.centers.y(raster)
+  # package=fields needs data as matrix, not vector
+  data.mx <- getValues(raster)
+  dim(data.mx) <- c(length(x.centers), length(y.centers)) # cols, rows
+#  plot.data(data.mx, title, subtitle, x.centers, y.centers, q.vec, colors, map)
+  # Above fails: data is reversed relative to map ?!?
+#  plot.mx <- data.mx[nrow(data.mx):1,] # just change the row indices
+  # And reversing matrix rows vertically fails--
+  # it mirrors the plot horizontally/east-west!
+  # What works: reverse the _cols_ horizontally (not transpose)
+  plot.mx <- data.mx[,ncol(data.mx):1] # just change the column indices
+  plot.data(plot.mx, title, subtitle, x.centers, y.centers, q.vec, colors, map)
+} # end function plot.raster
+
+plot.data <- function(
+  data,             # data to plot (required)
+  title,            # string for plot title (required?)
+                    # TODO: handle when null!
+  subtitle=NULL,    # string for plot subtitle
+  x.centers,        # vector of centers of columns
+  y.centers,        # vector of centers of rows
+  q.vec=NULL,       # for quantiles
+  colors,
+  map
+) {
+  if (sum(!is.na(data)) && (!is.null(q.vec))) {
+    plot.list <- list(x=x.centers, y=y.centers, z=data)
+    quantiles <- quantile(c(data), q.vec, na.rm=TRUE)
+    quantiles.formatted <- format(as.numeric(quantiles), digits=3)
+# start debugging
+#      print(paste('Non-null image.plot for source layer==', i.layer, ', quantile bounds=='))
+#      print(quantiles)
+#   end debugging
+    if (is.null(subtitle)) {
+      image.plot(plot.list, xlab="", ylab="", axes=F, col=colors(100),
+        axis.args=list(at=quantiles, labels=quantiles.formatted),
+        main=title)
+    } else {
+      image.plot(plot.list, xlab="", ylab="", axes=F, col=colors(100),
+        axis.args=list(at=quantiles, labels=quantiles.formatted),
+        main=title, sub=subtitle)
+    }
+    lines(map)
+  } else {
+# debugging
+#      print(paste('Null image.plot for source layer=', i.layer))
+    if (is.null(subtitle)) {
+      plot(0, type="n", axes=F, xlab="", ylab="",
+        xlim=range(x.centers), ylim=range(y.centers),
+        main=title)
+    } else {
+      plot(0, type="n", axes=F, xlab="", ylab="",
+        xlim=range(x.centers), ylim=range(y.centers),
+        main=title, sub=subtitle)
+    }
+    lines(map)
+  } # end testing data
+} # end function plot.data
+;;; NCL helpers for string processing.
+;;; * Formatters require NCL built-in str_insert(...)
+;;;   probably pretty crude, but should be "close enough"
+;;; * Converters should be replaced by built-ins in version >= 6.2.0
+;;; TODO: unit testing!
+
+;;; If lowercase(input)=="true", return NCL boolean==True. (else not)
+;;; TODO: handle missing input
+;;; adapted from http://www.ncl.ucar.edu/Support/talk_archives/2009/2751.html
+undef("stringtological") ; allow redefinition in console
+function stringtological(
+  str [*] : string ; vector of strings
+)
+begin
+  str_n = dimsizes(str)
+  str_lc = str_lower(str)
+;  log = new(str_n, logical) ; 'log' is reserved?
+  bool = new(str_n, logical)
+  do i = 0, str_n - 1
+    if (str_lc(i) .eq. "true") then
+      bool(i) = True
+    else
+      bool(i) = False
+    end if
+  end do
+  return(bool) 
+end ; stringtological
+
+;;; If len < 0, blanks pad before `str`.
+;;; If len > 0, blanks pad after `str`.
+;;; TODO: handle corners, e.g. len(str) > len, len==0
+undef("pad_blanks_to_length") ; allow redefinition in console
+function pad_blanks_to_length(
+  str [1] : string,  ; string to pad
+  len [1] : numeric  ; field width
+)
+begin
+  ; relies on odd behavior of built-in==`str_insert`
+  ; TODO: test length(str) <= len
+  return(str_insert(str, "", len))
+end ; pad_blanks_to_length
+
+undef("left_pad_blanks") ; allow redefinition in console
+function left_pad_blanks(
+  str [1] : string,  ; string to pad
+  len [1] : numeric  ; field width
+)
+begin
+  if (len > 0) then
+    len = -len
+  end if
+  return(pad_blanks_to_length(str, len))
+end ; left_pad_blanks
+
+undef("right_pad_blanks") ; allow redefinition in console
+function right_pad_blanks(
+  str [1] : string,  ; string to pad
+  len [1] : numeric  ; field width
+)
+begin
+  if (len < 0) then
+    len = -len
+  end if
+  return(pad_blanks_to_length(str, len))
+end ; right_pad_blanks

File summarize.ncl

+
+load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl" ; for stat_dispersion
+load "./time.ncl" ; for seconds_in_year
+
+undef("summarize_monthly_3d") ; allow redefinition in console
+procedure summarize_monthly_3d(
+  ; array with 3 dimensions: [month | 12] x ...
+  m3d_arr [*][*][*] : numeric
+)
+; solely for side-effect==printing
+local i_month, month, month_arr
+begin
+  do i_month=0 , 11 ; remember: NCL indexing is {C-style,0-based}
+    month_arr = m3d_arr(i_month,:,:)
+    month = i_month + 1
+    print("month=" + sprinti("%2.1i", month) + ": " +     \
+      "min=" + sprintf("%7.3e", min(month_arr)) + ", "  + \
+      "avg=" + sprintf("%7.3e", avg(month_arr)) + ", "  + \
+      "max=" + sprintf("%7.3e", max(month_arr)) )
+  end do
+end ; procedure summarize_monthly_3d
+
+undef("summarize_monthly_4d") ; allow redefinition in console
+; TODO: prototype arg dimensionality
+procedure summarize_monthly_4d(
+  ; array with 4 dimensions: (TSTEP | 25, LAY | 1, ROW | :, COL | :)
+  m4d_arr [*][*][*][*] : numeric,
+  month [1] : numeric
+)
+; solely for side-effect==printing
+local i_month
+begin
+  do i_tstep=0 , 24 ; remember: NCL indexing is {C-style,0-based}
+    print("month=" + sprinti("%2.1i", month) + ": " +     \
+      "min=" + sprintf("%7.3e", min(m4d_arr)) + ", "  + \
+      "avg=" + sprintf("%7.3e", avg(m4d_arr)) + ", "  + \
+      "max=" + sprintf("%7.3e", max(m4d_arr)) )
+  end do
+end ; procedure summarize_monthly_4d
+
+undef("summarize_annual_4d") ; allow redefinition in console
+procedure summarize_annual_4d(
+  ; array with 4 dimensions: (TSTEP | 25, LAY | 1, ROW | :, COL | :)
+  a4d_arr [*][*][*][*]: numeric
+)
+; solely for side-effect==printing
+local i_tstep
+begin
+  do i_tstep=0 , 24 ; remember: NCL indexing is {C-style,0-based}
+    print(\
+      "min=" + sprintf("%7.3e", min(a4d_arr)) + ", "  + \
+      "avg=" + sprintf("%7.3e", avg(a4d_arr)) + ", "  + \
+      "max=" + sprintf("%7.3e", max(a4d_arr)) )
+  end do
+end ; procedure summarize_annual_4d
+
+undef("summarize_annual_3d") ; allow redefinition in console
+; TODO: prototype arg dimensionality
+procedure summarize_annual_3d(
+  ; array with 4 dimensions: (TSTEP | 25, LAY | 1, ROW | :, COL | :)
+  a4d_arr [*][*][*][*]: numeric
+  ; ... 2 of which we ignore
+)
+local stats ; opt
+; solely for side-effect==printing
+begin
+  ; see http://www.ncl.ucar.edu/Document/Functions/Contributed/stat_dispersion.shtml
+;  opt = True
+;  opt@PrintStat = True
+;  stats = stat_dispersion(a4d_arr, opt)
+  stats = stat_dispersion(a4d_arr, False) ; False == don't stdout
+;    "cells=" + sprintf("%7.3e", stats(18)) + str_get_nl()  + \
+;    "obs  =" + sprintf("%7.3e", stats(19)) + str_get_nl()  + \
+  print(\
+    str_get_nl()                                           + \
+    "cells=" + stats(18)                   + str_get_nl()  + \
+    "obs  =" + stats(19)                   + str_get_nl()  + \
+    "min  =" + sprintf("%7.3e", stats(2))  + str_get_nl()  + \
+    "q1   =" + sprintf("%7.3e", stats(6))  + str_get_nl()  + \
+    "mean =" + sprintf("%7.3e", stats(0))  + str_get_nl()  + \
+    "med  =" + sprintf("%7.3e", stats(8))  + str_get_nl()  + \
+    "q3   =" + sprintf("%7.3e", stats(10)) + str_get_nl()  + \
+    "max  =" + sprintf("%7.3e", stats(14)) + str_get_nl()  + \
+    "sum  =" + sprintf("%7.3e", sum(a4d_arr)) )
+end ; procedure summarize_annual_3d
+
+undef("compare_EDGAR_sums") ; allow redefinition in console
+;;; compare to value from EDGAR datavar
+;;; solely for side-effect==printing
+procedure compare_EDGAR_sums(
+  tag [1]           : string,   ; just a bit of text to ID our subject
+  datavar [*][*]    : numeric,  ; EDGAR data source
+  candidate_sum [1] : numeric,  ; units=kg/s
+  year [1]          : numeric   ; of data
+)
+local edgar_sum_string_raw, edgar_sum_string, edgar_sum, calc_sum
+begin
+  edgar_sum_string_raw = str_left_strip(datavar@total_emi_n2o)
+;  print("edgar_sum_string_raw='"+edgar_sum_string_raw+"'") ; debugging
+  ;;; remove units (and intervening space) from numeric part
+  edgar_sum_string = str_get_field(edgar_sum_string_raw, 1, " ")
+;  print("edgar_sum_string='"+edgar_sum_string+"'") ; debugging
+  edgar_sum = stringtofloat(edgar_sum_string)       ; kg/yr
+  calc_sum = seconds_in_year(year) * candidate_sum  ; (kg/s) * (s/yr)
+
+  print(\
+    str_get_nl()+tag+": "+ \
+    "sum (kg/yr) from EDGAR="+edgar_sum+", "+ \
+    "calculated sum="+calc_sum+", "+ \
+    "EDGAR/calc="+(edgar_sum/calc_sum)+str_get_nl() \
+  )
+end ; procedure compare_EDGAR_sums
+;;; NCL helpers for time calculations.
+;;; requires NCL built-in days_in_month(...)
+;;; probably pretty crude, but should be "close enough"
+;;; TODO: unit testing!
+
+undef("hours_in_month") ; allow redefinition in console
+function hours_in_month(
+  year [1] : integer,
+  month [1] : integer
+)
+begin
+  return(days_in_month(year, month) * 24)
+end ; function hours_in_month
+
+undef("seconds_in_month") ; allow redefinition in console
+function seconds_in_month(
+  year [1] : integer,
+  month [1] : integer
+)
+begin
+  return(hours_in_month(year, month) * 3600) ; 60*60
+end ; function seconds_in_month
+
+undef("hours_in_year") ; allow redefinition in console
+function hours_in_year(
+  year [*] : integer ; vector of years as ints
+)
+local hours
+begin
+  hours = 0
+  do i_month=0 , 11 ; remember: NCL indexing is {C-style,0-based}
+    hours = hours + hours_in_month(year, i_month+1)
+  end do
+  return(hours)
+end ; function hours_in_year
+
+undef("seconds_in_year") ; allow redefinition in console
+function seconds_in_year(
+  year [*] : integer ; vector of years as ints
+)
+begin
+  return(hours_in_year(year) * 3600) ; 60*60
+end ; function seconds_in_year
+
+undef("days_in_year") ; allow redefinition in console
+function days_in_year(
+  year [*] : integer ; vector of years as ints
+)
+begin
+  ; ASSERT: 31 Dec is last day in any year
+;  ; start debugging---------------------------------------------------
+;  print("time.ncl::days_in_year: year='"+year+"'")
+;  print("time.ncl::days_in_year: return='"+toint(day_of_year(year, 12, 31))+"'")
+;  ;   end debugging---------------------------------------------------
+  return(toint(day_of_year(year, 12, 31)))
+end ; function days_in_year
+
+undef("first_day_of_month") ; allow redefinition in console
+function first_day_of_month(
+  year [1] : integer,
+  month [1] : integer ; index of month in year
+)
+begin
+  return(toint(day_of_year(year, month, 1)))
+end ; function first_day_of_month
+
+undef("last_day_of_month") ; allow redefinition in console
+function last_day_of_month(
+  year [1] : integer,
+  month [1] : integer ; index of month in year
+)
+begin
+  return(toint(day_of_year(year, month, days_in_month(year, month))))
+end ; function last_day_of_month

File visualization.r

+# R code for depicting Raster*, whether single- or multi-layer.
+
+# REQUIRES netCDF.stats.to.stdout.r, e.g.,
+# source(stat_funcs_fp)
+# TODO: package my code! see howto, e.g., Wickham project=devtools
+
+# ----------------------------------------------------------------------
+
+# database: Geographical database to use.  Choices include "state"
+#           (default), "world", "worldHires", "canusamex", etc.  Use
+#           "canusamex" to get the national boundaries of the Canada, the
+#           USA, and Mexico, along with the boundaries of the states.
+#           The other choices ("state", "world", etc.) are the names of
+#           databases included with the ‘maps’ and ‘mapdata’ packages.
+
+project.M3.boundaries.for.CMAQ <- function(
+  database='state', # see `?M3::get.map.lines.M3.proj`
+  units='m',        # or 'km': see `?M3::get.map.lines.M3.proj`
+  extents.fp,       # path to extents file
+  # TODO: calculate extents from extents.fp if extents==null
+  extents,          # raster::extent object
+  # TODO: calculate LCC.parallels from if LCC.parallels==null
+  LCC.parallels=c(33,45), # LCC standard parallels: see https://github.com/TomRoche/cornbeltN2O/wiki/AQMEII-North-American-domain#wiki-EPA
+  CRS               # see `sp::CRS`
+) {
+
+  library(M3)
+  ## Will replace raw LCC map's coordinates with:
+  metadata.coords.IOAPI.list <- M3::get.grid.info.M3(extents.fp)
+  metadata.coords.IOAPI.x.orig <- metadata.coords.IOAPI.list$x.orig
+  metadata.coords.IOAPI.y.orig <- metadata.coords.IOAPI.list$y.orig
+  metadata.coords.IOAPI.x.cell.width <- metadata.coords.IOAPI.list$x.cell.width
+  metadata.coords.IOAPI.y.cell.width <- metadata.coords.IOAPI.list$y.cell.width
+
+  library(maps)
+  map.lines <- M3::get.map.lines.M3.proj(
+    file=extents.fp, database=database, units="m")
+  # dimensions are in meters, not cells. TODO: take argument
+  map.lines.coords.IOAPI.x <-
+    (map.lines$coords[,1] - metadata.coords.IOAPI.x.orig)
+  map.lines.coords.IOAPI.y <-
+    (map.lines$coords[,2] - metadata.coords.IOAPI.y.orig)
+  map.lines.coords.IOAPI <- 
+    cbind(map.lines.coords.IOAPI.x, map.lines.coords.IOAPI.y)
+
+  # # start debugging
+  # class(map.lines.coords.IOAPI)
+  # # [1] "matrix"
+  # summary(map.lines.coords.IOAPI)
+  # #  map.lines.coords.IOAPI.x map.lines.coords.IOAPI.y
+  # #  Min.   : 283762                Min.   : 160844               
+  # #  1st Qu.:2650244                1st Qu.:1054047               
+  # #  Median :3469204                Median :1701052               
+  # #  Mean   :3245997                Mean   :1643356               
+  # #  3rd Qu.:4300969                3rd Qu.:2252531               
+  # #  Max.   :4878260                Max.   :2993778               
+  # #  NA's   :168                    NA's   :168             
+  # #   end debugging
+
+  # Note above is not zero-centered, like our extents:
+  # extent : -2556000, 2952000, -1728000, 1860000  (xmin, xmax, ymin, ymax)
+  # So gotta add (xmin, ymin) below.
+
+  ## Get LCC state map
+  # see http://stackoverflow.com/questions/14865507/how-to-display-a-projected-map-on-an-rlatticelayerplot
+  map.IOAPI <- maps::map(
+    database="state", projection="lambert", par=LCC.parallels, plot=FALSE)
+  #                  parameters to lambert: ^^^^^^^^^^^^^^^^^
+  #                  see mapproj::mapproject
+  map.IOAPI$x <- map.lines.coords.IOAPI.x + extents.xmin
+  map.IOAPI$y <- map.lines.coords.IOAPI.y + extents.ymin
+  map.IOAPI$range <- c(
+    min(map.IOAPI$x),
+    max(map.IOAPI$x),
+    min(map.IOAPI$y),
+    max(map.IOAPI$y))
+  map.IOAPI.shp <-
+    maptools::map2SpatialLines(map.IOAPI, proj4string=CRS)
+
+  # # start debugging
+  # class(map.IOAPI.shp)
+  # # [1] "SpatialLines"
+  # # attr(,"package")
+  # # [1] "sp"
+  # bbox(map.IOAPI.shp)
+  # #        min     max
+  # # x -2272238 2322260
+  # # y -1567156 1265778
+  # # compare to (above)
+  # # > template.extents
+  # # class       : Extent 
+  # # xmin        : -2556000 
+  # # xmax        : 2952000 
+  # # ymin        : -1728000 
+  # # ymax        : 1860000 
+  # #   end debugging
+
+  return(map.IOAPI.shp)
+} # end project.M3.boundaries.for.CMAQ <- function(
+
+# ----------------------------------------------------------------------
+
+# see `project.M3.boundaries.for.CMAQ` above
+project.US.state.boundaries.for.CMAQ <- function(
+  units='m',
+  extents.fp,
+  extents,
+  LCC.parallels=c(33,45),
+  CRS
+) {
+  return(
+    project.M3.boundaries.for.CMAQ(
+      database='state',
+      units,
+      extents.fp,
+      extents,
+      LCC.parallels,
+      CRS)
+  )
+} # end project.US.state.boundaries.for.CMAQ <- function(
+
+# ----------------------------------------------------------------------
+
+# see `project.M3.boundaries.for.CMAQ` above
+project.NorAm.boundaries.for.CMAQ <- function(
+  units='m',
+  extents.fp,
+  extents,
+  LCC.parallels=c(33,45),
+  CRS
+) {
+  return(
+    project.M3.boundaries.for.CMAQ(
+      database='canusamex',
+      units,
+      extents.fp,
+      extents,
+      LCC.parallels,
+      CRS)
+  )
+} # end project.NorAm.boundaries.for.CMAQ <- function(
+
+# ----------------------------------------------------------------------
+
+nonplot.vis.layer <- function(
+  nc.fp,         # path to netCDF datasource ...
+  datavar.name,  # name of the netCDF data variable
+  sigdigs=3      # precision to use for stats
+) {
+  # ncdump the file
+  cmd <- sprintf('ncdump -h %s', nc.fp)
+  cat(sprintf('%s==\n', cmd))
+  system(cmd)
+  # get stats on it
+  netCDF.stats.to.stdout(
+    netcdf.fp=nc.fp,
+    data.var.name=datavar.name,
+    stats.precision=sigdigs)
+} # end nonplot.vis.layer <- function
+
+# ----------------------------------------------------------------------
+
+plot.vis.layer <- function(
+  layer,         # ... for data (a RasterLayer)
+  map.list,      # maps to overlay on data: list of SpatialLines or objects castable thereto 
+  pdf.fp,        # path to PDF output
+  pdf.height,
+  pdf.width
+) {
+  # start plot driver
+  cat(sprintf(
+    '%s: plotting %s (may take awhile! TODO: add progress control)\n',
+    'plot.vis.layer', pdf.fp))
+  pdf(file=pdf.fp, width=pdf.width, height=pdf.height)
+
+  library(rasterVis)
+
+  # I want to overlay the data with each map in the list, iteratively.
+  # But the following does not work: only the last map in the list shows.
+#  plots <- rasterVis::levelplot(layer,
+#  #  layers,
+#    margin=FALSE,
+#  #  names.attr=names(global.df),
+#    layout=c(1,length(names(layer))))
+#  for (i.map in 1:length(map.list)) {
+#    plots <- plots + latticeExtra::layer(
+#      # why does this fail if map.shp is local? see 'KLUDGE' in callers :-(
+#      sp::sp.lines(
+#        as(map.list[[i.map]], "SpatialLines"),
+#        lwd=0.8, col='darkgray'))
+#  } # end for (i.map in 1:length(map.list))
+#  plot(plots)
+
+  # For now, kludge :-( handle lists of length 1 and 2 separately:
+  if        (length(map.list) == 1) {
+
+    plot(
+      rasterVis::levelplot(layer,
+        margin=FALSE,
+#        layout=c(1,length(names(layer)))
+      ) + latticeExtra::layer(
+        sp::sp.lines(
+          as(map.list[[1]], "SpatialLines"),
+          lwd=0.8, col='darkgray')
+      )
+    )
+
+  } else if (length(map.list) == 2) {
+
+    plot(
+      rasterVis::levelplot(layer,
+        margin=FALSE,
+#        layout=c(1,length(names(layer)))
+      ) + latticeExtra::layer(
+        sp::sp.lines(
+          as(map.list[[1]], "SpatialLines"),
+          lwd=0.8, col='darkgray')
+      ) + latticeExtra::layer(
+        sp::sp.lines(
+          as(map.list[[2]], "SpatialLines"),
+          lwd=0.8, col='darkgray')
+      )
+    )
+
+  } else {
+    stop(sprintf('plot.vis.layer: ERROR: cannot handle (length(map.list)==%i', length(map.list)))
+  }
+  
+  dev.off()
+
+} # end plot.vis.layer <- function
+
+# ----------------------------------------------------------------------
+
+visualize.layer <- function(
+  nc.fp,         # path to netCDF datasource ...
+  datavar.name,  # name of the netCDF data variable
+  sigdigs=3,     # precision to use for stats
+  layer,         # ... for data (a RasterLayer)
+  map.list,      # maps to overlay on data: list of SpatialLines or objects castable thereto 
+  pdf.fp,        # path to PDF output
+  pdf.height,
+  pdf.width
+) {
+  nonplot.vis.layer(nc.fp, datavar.name, sigdigs)
+  plot.vis.layer(layer, map.list, pdf.fp, pdf.height, pdf.width)
+} # end visualize.layer <- function
+
+# ----------------------------------------------------------------------
+
+nonplot.vis.layers <- function(
+  nc.fp,           # path to netCDF datasource ...
+  datavar.name,    # name of the netCDF data variable
+  layer.dim.name,  # name of the datavar dimension indexing the layers (e.g., 'time')
+  sigdigs=3        # precision to use for stats
+) {
+  # ncdump the file
+  cmd <- sprintf('ncdump -h %s', nc.fp)
+  cat(sprintf('%s==\n', cmd))
+  system(cmd)
+  # get stats on it
+  # until netCDF.stats.to.stdout.by.timestep supports range restriction, just get all timesteps
+  netCDF.stats.to.stdout.by.timestep(
+    netcdf.fp=nc.fp,
+    data.var.name=datavar.name,
+    time.dim.name=layer.dim.name,
+    stats.precision=sigdigs)
+
+#  # TODO: make these work as if evaluated @ top level
+#  brick          # show it
+#  summary(brick) # compare to netCDF.stats above
+} # end nonplot.vis.layers <- function
+
+# ----------------------------------------------------------------------
+
+plot.vis.layers <- function(
+  brick,    # ... for data (a RasterBrick)
+  map.list, # maps to overlay on data: list of SpatialLines or objects castable thereto 
+  pdf.fp,   # path to PDF output
+  pdf.height,
+  pdf.width
+) {
+  # start plot driver
+  cat(sprintf(
+    '%s: plotting %s (may take awhile! TODO: add progress control)\n',
+    'plot.vis.layers', pdf.fp))
+  pdf(file=pdf.fp, width=pdf.width, height=pdf.height)
+
+  library(rasterVis)
+
+  # I want to overlay the data with each map in the list, iteratively.
+  # But the following does not work: only the last map in the list shows.
+#  plots <- rasterVis::levelplot(brick,
+#  #  layers,
+#    margin=FALSE,
+#  #  names.attr=names(global.df),
+#    layout=c(1,length(names(brick))))
+#  for (i.map in 1:length(map.list)) {
+#    plots <- plots + latticeExtra::layer(
+#      # why does this fail if map.shp is local? see 'KLUDGE' in callers :-(
+#      sp::sp.lines(
+#        as(map.list[[i.map]], "SpatialLines"),
+#        lwd=0.8, col='darkgray'))
+#  } # end for (i.map in 1:length(map.list))
+#  plot(plots)
+
+  # For now, kludge :-( handle lists of length 1 and 2 separately:
+  if        (length(map.list) == 1) {
+
+    plot(
+      rasterVis::levelplot(brick,
+        margin=FALSE,
+        layout=c(1,length(names(brick)))
+      ) + latticeExtra::layer(
+        sp::sp.lines(
+          as(map.list[[1]], "SpatialLines"),
+          lwd=0.8, col='darkgray')
+      )
+    )
+
+  } else if (length(map.list) == 2) {
+
+    plot(
+      rasterVis::levelplot(brick,
+        margin=FALSE,
+        layout=c(1,length(names(brick)))
+      ) + latticeExtra::layer(
+        sp::sp.lines(
+          as(map.list[[1]], "SpatialLines"),
+          lwd=0.8, col='darkgray')
+      ) + latticeExtra::layer(
+        sp::sp.lines(
+          as(map.list[[2]], "SpatialLines"),
+          lwd=0.8, col='darkgray')
+      )
+    )
+
+  } else {
+    stop(sprintf('plot.vis.layers: ERROR: cannot handle (length(map.list)==%i', length(map.list)))
+  }
+  
+  dev.off()
+
+} # end plot.vis.layers <- function
+
+# ----------------------------------------------------------------------
+
+visualize.layers <- function(
+  nc.fp,           # path to netCDF datasource ...
+  datavar.name,    # name of the netCDF data variable
+  layer.dim.name,  # name of the datavar dimension indexing the layers (e.g., 'time')
+  sigdigs=3,       # precision to use for stats
+  brick,           # ... for data (a RasterBrick)
+  map.list, # maps to overlay on data: list of SpatialLines or objects castable thereto 
+  pdf.fp,          # path to PDF output
+  pdf.height,
+  pdf.width
+) {
+  nonplot.vis.layers(nc.fp, datavar.name, layer.dim.name, sigdigs)
+  plot.vis.layers(brick, map.list, pdf.fp, pdf.height, pdf.width)
+} # end visualize.layers <- function