Source

GEIA_regrid / regrid_global_to_AQMEII.r

# R code to 2D-regrid GEIA global/unprojected netCDF data to AQMEII-NA, an LCC-projected subdomain.
# If running manually in R console, remember to run setup actions: `source ./regrid_GEIA_netCDF.sh`

## library(ncdf4)
## library(fields)
## # run if rgeos not available
## # gpclibPermit()
## 

# ----------------------------------------------------------------------
# constants
# ----------------------------------------------------------------------

this.fn <- Sys.getenv('CALL_REGRID_FN')

# all the following env vars must be set and exported in driver script
work.dir <- Sys.getenv('WORK_DIR')
pdf.fp <- Sys.getenv('GEIA_REGRID_PDF_FP')
pdf.er <- Sys.getenv('PDF_VIEWER')
sigdigs <- as.numeric(Sys.getenv('OUTPUT_SIGNIFICANT_DIGITS'))
in.fp <- Sys.getenv('GEIA_REFORMAT_FP')
in.band <- Sys.getenv('GEIA_REFORMAT_BAND')
data.var.name <- Sys.getenv('GEIA_REFORMAT_DATAVAR_NAME')
data.var.longname <- Sys.getenv('GEIA_REFORMAT_DATAVAR_LONGNAME')
data.var.unit <- Sys.getenv('GEIA_REFORMAT_DATAVAR_UNIT')
data.var.na <- as.numeric(Sys.getenv('GEIA_REFORMAT_DATAVAR_NA')) # must convert from string
template.var.name <- Sys.getenv('TEMPLATE_VAR_NAME')
template.in.fp <- Sys.getenv('TEMPLATE_INPUT_FP')
template.band <- Sys.getenv('TEMPLATE_INPUT_BAND')
out.fp <- Sys.getenv('GEIA_REGRID_FP')
x.var.name <- Sys.getenv('GEIA_REGRID_X_VAR_NAME')
y.var.name <- Sys.getenv('GEIA_REGRID_Y_VAR_NAME')

# ----------------------------------------------------------------------
# functions
# ----------------------------------------------------------------------

# ----------------------------------------------------------------------
# payload
# ----------------------------------------------------------------------

# ----------------------------------------------------------------------
# setup
# ----------------------------------------------------------------------

# coordinate reference system:
# use package=M3 to get CRS from template file
library(M3)
out.proj4 <- M3::get.proj.info.M3(template.in.fp)
cat(sprintf('out.proj4=%s\n', out.proj4)) # debugging
# out.proj4=+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40 +lon_0=-97 +a=6370000 +b=6370000

stat.script.fp <- Sys.getenv('STAT_SCRIPT_FP')
source(stat.script.fp) # produces errant error=
#> netCDF.stats.to.stdout.r: no arguments supplied, exiting
plot.script.fp <- Sys.getenv('PLOT_SCRIPT_FP')
source(plot.script.fp)

# if any of the above failed, check how you ran (e.g., `source`d) driver script
# plot constants below

# check input

system(sprintf('ncdump -h %s', in.fp))
# netcdf GEIA_N2O_oceanic {
# dimensions:
#   lon = 360 ;
#   lat = 180 ;
#   time = UNLIMITED ; // (1 currently)
# variables:
#   double lon(lon) ;
# ...
#   double lat(lat) ;
# ...
#   double time(time) ;
# ...
#   double emi_n2o(time, lat, lon) ;
#     emi_n2o:units = "ton N2O-N/yr" ;
#     emi_n2o:missing_value = -999. ;
#     emi_n2o:long_name = "N2O emissions" ;
#     emi_n2o:_FillValue = -999.f ;

# Note missing/fill values: more below.

netCDF.stats.to.stdout(netcdf.fp=in.fp, var.name=data.var.name)
# For /tmp/projectRasterTest/GEIA_N2O_oceanic.nc var=emi_n2o
#       cells=64800
#       obs=36143
#       min=5.96e-08
#       q1=30.4
#       med=67.7
#       mean=99.5
#       q3=140
#       max=1.17e+03
# Note min and max of the input, and that min > 0.

# make input raster

library(raster)
in.raster <- raster::raster(in.fp, varname=data.var.name) #, band=in.band)
in.raster
# class       : RasterLayer 
# dimensions  : 180, 360, 64800  (nrow, ncol, ncell)
# resolution  : 1, 1  (x, y)
# extent      : -180, 180, -90, 90  (xmin, xmax, ymin, ymax)
# coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
# values      : /tmp/projectRasterTest/GEIA_N2O_oceanic.nc 
# layer name  : N2O.emissions 
# z-value     : 0 
# zvar        : emi_n2o 

# make template raster to format output.
# Note below that one should be able to projectRaster(...) with a resolution (i.e.,
# from=in.raster, res=c(12e3, 12e3), crs=out.proj4,
# ) as well as a template (i.e.,
# from=in.raster, to=template.raster, crs=out.proj4,
# ). So why create a template? Because just giving a resolution hangs.

# look @ input (one of the PHASE AQ inputs) to template/extents raster:
# truncate 'ncdump' to avoid IOAPI cruft
system(sprintf('ncdump -h %s | head -n 13', template.in.fp))
# netcdf emis_mole_all_20080101_12US1_cmaq_cb05_soa_2008ab_08c.EXTENTS_INPUT {
# dimensions:
#   TSTEP = UNLIMITED ; // (25 currently)
#   LAY = 1 ;
#   ROW = 299 ;
#   COL = 459 ;
# variables:
#   float emi_n2o(TSTEP, LAY, ROW, COL) ;
#     emi_n2o:long_name = "XYL             " ;
#     emi_n2o:units = "moles/s         " ;
#     emi_n2o:var_desc = "Model species XYL                                                               " ;

# use M3 to get extents from template file (thanks CGN!)
extents.info <- M3::get.grid.info.M3(template.in.fp)
extents.xmin <- extents.info$x.orig
extents.xmax <- max(
  M3::get.coord.for.dimension(
    file=template.in.fp, dimension="col", position="upper", units="m")$coords)
extents.ymin <- extents.info$y.orig
extents.ymax <- max(
  M3::get.coord.for.dimension(
    file=template.in.fp, dimension="row", position="upper", units="m")$coords)
grid.res <- c(extents.info$x.cell.width, extents.info$y.cell.width) # units=m

template.extents <-
  raster::extent(extents.xmin, extents.xmax, extents.ymin, extents.ymax)
template.extents

template.in.raster <-
  raster::raster(template.in.fp, varname=template.var.name, band=template.band)
template.raster <- raster::projectExtent(template.in.raster, crs=out.proj4)
#> Warning message:
#> In projectExtent(template.in.raster, out.proj4) :
#>   158 projected point(s) not finite
# is that "projected point(s) not finite" warning important? Probably not, per Hijmans
template.raster@extent <- template.extents
# should resemble the domain specification @
# https://github.com/TomRoche/cornbeltN2O/wiki/AQMEII-North-American-domain#wiki-EPA
template.raster
# class       : RasterLayer 
# dimensions  : 299, 459, 137241  (nrow, ncol, ncell)
# resolution  : 12000, 12000  (x, y)
# extent      : -2556000, 2952000, -1728000, 1860000  (xmin, xmax, ymin, ymax)
# coord. ref. : +proj=lcc +lat_1=33 +lat_2=45 +lat_0=40 +lon_0=-97 +a=6370000 +b=6370000 

# start debug---------------------------------------------------------
# regrid.start.time <- system('date', intern=TRUE)
# regrid.start.str <- sprintf('start regrid @ %s', regrid.start.time)
# cat(sprintf('%s\n', regrid.start.str))
#   end debug---------------------------------------------------------

# at last: do the regridding
out.raster <-
  raster::projectRaster(
    # give a template with extents--fast, but gotta calculate extents
    from=in.raster, to=template.raster, crs=out.proj4,
    # give a resolution instead of a template? no, that hangs
#    from=in.raster, res=grid.res, crs=out.proj4,
    method='bilinear', overwrite=TRUE, format='CDF',
    # args from writeRaster
    NAflag=data.var.na,
    varname=data.var.name, 
    varunit=data.var.unit,
    longname=data.var.longname,
    xname=x.var.name,
    yname=y.var.name,
    filename=out.fp)
# above fails to set CRS, so
library(sp)
out.raster@crs <- sp::CRS(out.proj4)
out.raster
# class       : RasterLayer 
# dimensions  : 299, 459, 137241  (nrow, ncol, ncell)
# resolution  : 12000, 12000  (x, y)
# extent      : -2556000, 2952000, -1728000, 1860000  (xmin, xmax, ymin, ymax)
# coord. ref. : +proj=lcc +lat_1=33 +lat_2=45 +lat_0=40 +lon_0=-97 +a=6370000 +b=6370000 
# data source : /tmp/projectRasterTest/GEIA_N2O_oceanic_regrid.nc 
# names       : N2O.emissions 
# zvar        : emi_n2o 

# start debug---------------------------------------------------------
# regrid.end.time <- system('date', intern=TRUE)
# cat(sprintf('  end regrid @ %s\n', regrid.end.time))
# cat(sprintf('%s\n', regrid.start.time))
#   end debug---------------------------------------------------------

system(sprintf('ls -alht %s', work.dir))
system(sprintf('ncdump -h %s', out.fp))
# netcdf GEIA_N2O_oceanic_regrid {
# dimensions:
#   COL = 459 ;
#   ROW = 299 ;
# variables:
#   double COL(COL) ;
#     COL:units = "meter" ;
#     COL:long_name = "COL" ;
#   double ROW(ROW) ;
#     ROW:units = "meter" ;
#     ROW:long_name = "ROW" ;
#   float emi_n2o(ROW, COL) ;
#     emi_n2o:units = "ton N2O-N/yr" ;
#     emi_n2o:_FillValue = -999. ;
#     emi_n2o:missing_value = -999. ;
#     emi_n2o:long_name = "N2O emissions" ;
#     emi_n2o:projection = "+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40 +lon_0=-97 +a=6370000 +b=6370000" ;
#     emi_n2o:projection_format = "PROJ.4" ;
#     emi_n2o:min = 0.83728 ;
#     emi_n2o:max = 522.693774638276 ;
# ...

netCDF.stats.to.stdout(netcdf.fp=out.fp, var.name=data.var.name)
# For ./GEIA_N2O_oceanic_regrid.nc var=emi_n2o
#   cells=137241
#   obs=46473
#   min=0.837
#   q1=11.9
#   med=54.1
#   mean=71.9
#   q3=85.5
#   max=523

# plot to PDF

# plot constants

library(maptools)
data(wrld_simpl) # from maptools
# map.us.unproj <- wrld_simpl[wrld_simpl$ISO3 == 'USA', ]  # unprojected
library(rgdal)
# map.us.proj <- rgdal::spTransform(map.us.unproj, sp::CRS(out.proj4))  # projected
# do North America instead
map.us.unproj <- wrld_simpl[wrld_simpl$ISO3 %in% c('CAN', 'MEX', 'USA'),]
map.us.proj <-
  rgdal::spTransform(map.us.unproj, sp::CRS(out.proj4)) # projected

title <- sprintf('%s\n%s',
  Sys.getenv('GEIA_REGRID_PDF_TITLE'), Sys.getenv('GEIA_REGRID_DATAVAR_UNIT'))

out.data.vec <- getValues(out.raster) # raster data as vector
subtitle <- subtitle.stats(out.data.vec)

# package=M3 map for fields::image.plot
map.table.fp <- Sys.getenv('MAP_TABLE_FP')
map.cmaq <- read.table(map.table.fp, sep=",")
palette.vec <- c(
# original from KMF, 3 colors added to get deciles in probabilities.vec
  #              R color
  #                code
  "grey",         # 260
  "purple",       # 547
  "deepskyblue2", # 123  
  "green",        # 254
  "greenyellow",  # 259
  "yellow",       # 652
  "orange",       # 498
  "orangered",    # 503
  "red",          # 552
  "red4",         # 556
  "brown"         #  32
)
colors <- colorRampPalette(palette.vec)
# used for quantiling legend
probabilities.vec <- seq(0, 1, 1.0/(length(palette.vec) - 1))
# probabilities.vec
# [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0

x.centers <- raster.centers.x(out.raster)
y.centers <- raster.centers.y(out.raster)
quantiles <- quantile(out.data.vec, probabilities.vec, na.rm=TRUE)
quantiles.formatted <- format(as.numeric(quantiles), digits=sigdigs)

# repeat for each plot file
pdf(file=pdf.fp, width=5.5, height=4.25)

# plot page 1: raster::plot-------------------------------------------
# plot(out.raster, main=title, sub=subtitle) # works
plot(out.raster, # remaining args from image.plot
  main=title, sub=subtitle,
  xlab='', ylab='', axes=F, col=colors(100),
  axis.args=list(at=quantiles, labels=quantiles.formatted))
# add a projected CONUS map
plot(map.us.proj, add=TRUE)

# plot page 2: fields::image.plot-------------------------------------

plot.raster(
  raster=out.raster,
  title=title, 
  subtitle=subtitle,
  q.vec=probabilities.vec,
  colors,
  map.cmaq
)

# step through plot.raster:
# package=fields needs data as matrix, not vector
# out.data.mx <- out.data.vec
# dim(out.data.mx) <- c(length(x.centers), length(y.centers)) # cols, rows
# plot.data(out.data.mx, title, subtitle, x.centers, y.centers, probabilities.vec, colors, map.cmaq)

# step through plot.data
# plot.list <- list(x=x.centers, y=y.centers, z=out.data.mx)
# image.plot(plot.list, xlab='', ylab='', axes=F, col=colors(100),
#   axis.args=list(at=quantiles, labels=quantiles.formatted),
#   main=title, sub=subtitle)
# lines(map.cmaq)

#   end image.plot----------------------------------------------------

# flush the plot device
dev.off()