Source

MOZART-global to AQMEII-NA / geographicalize_IOAPI.r

# R code, driven by geographicalize_IOAPI.sh, to
# convert data from IOAPI-gridded netCDF datavar with dimensions including (ROW, COL) to instead use (lat, lon).
# TODO: pass parameters via commandline, not environment variables

# constants-----------------------------------------------------------

# all the following env vars must be set and exported in driver script (TODO: fail if not)
this.fn <- Sys.getenv('CALL_R_FN')  # FIXME: gets overidden by `source` below
# cat(sprintf('this.fn=%s\n', this.fn)) # debugging

# local constants, from environment-----------------------------------
# taking from Rscript call failed, annoyingly and mysteriously :-(

# cat(sprintf('%s: Setting constants from environment ...\n', this.fn)) # debugging

# source grid for this script, intermediate grid from driver perspective
sg.netcdf.fp <- Sys.getenv('IG_NETCDF_FP')
# sg.netcdf.fp <- './ICON_CB05AE5_US12_2007356_pruned.nc'
# sg.csv.fp <- Sys.getenv('IG_CSV_FP')
# sg.csv.fp <- './ICON_CB05AE5_US12_2007356_pruned.txt'
# sg.datavar.grid.name <- Sys.getenv('SG_DATAVAR_GRID_NAME')
# sg.datavar.grid.names.vec <- Sys.getenv('SG_DATAVAR_GRID_NAMES_VEC')
# sg.datavar.grid.names.vec <- c('ROW', 'COL', 'LAY', 'TSTEP', 'CONC')
sg.datavar.grid.units <- Sys.getenv('SG_DATAVAR_GRID_UNITS')
# sg.levels.hsp.raw == direct from ICON_CB05AE5_US12_2007356::VGLVLS
# convert to numeric array
# sg.levels.hsp.raw <- Sys.getenv('SG_LEVELS_HSP_RAW')
sg.levels.hsp.raw <-
  as.numeric(unlist(strsplit(Sys.getenv('SG_LEVELS_HSP_RAW'), split=',', fixed=TRUE)))
# # start debugging
# cat(sprintf('sg.levels.hsp.raw==%s\n', sg.levels.hsp.raw))
# cat(sprintf('class(sg.levels.hsp.raw)==%s\n', class(sg.levels.hsp.raw)))
# cat(sprintf('length(sg.levels.hsp.raw)==%s\n', length(sg.levels.hsp.raw)))
# sg.levels.hsp.raw <- as.numeric(x=sg.levels.hsp.raw, length=length(sg.levels.hsp.raw))
# #   end debugging

# target grid for this script
tg.csv.fp <- Sys.getenv('TG_CSV_FP') # path to output CSV file
# tg.csv.fp <- './ICON_CB05AE5_US12_2007356_pruned_geoged.txt'

# start debugging
# TODO: check paths
cat(sprintf('sg.netcdf.fp==%s\n', sg.netcdf.fp))
cat(sprintf('sg.datavar.grid.units==%s\n', sg.datavar.grid.units))
# cat(sprintf('sg.levels.hsp.raw==%s\n', sg.levels.hsp.raw)) # need format string
print('sg.levels.hsp.raw==')
print(sg.levels.hsp.raw)
cat(sprintf('tg.csv.fp==%s\n', tg.csv.fp))
# #   end debugging

# functions-----------------------------------------------------------

# code----------------------------------------------------------------

cat(sprintf('%s: Loading package=M3 ...\n', this.fn)) # debugging
library(M3) # for get.coord.for.dimension, project.M3.to.lonlat

# 1. Get grid node coordinates from the source netCDF grid.

## COL
## M3.pdf
## > If dimension is "column" or "col", return as element coords a vector containing the x-coordinates
## > of the centers ("ctr"), left ("lower"), or right ("upper") edges of each row

### Get bottom cell node coordinates.

sg.datavar.grid.nodes.x.less <-
  M3::get.coord.for.dimension(
    file=sg.netcdf.fp, dimension="col", position = "lower", units=sg.datavar.grid.units)$coords
# sg.datavar.grid.nodes.x.len <-length(sg.datavar.grid.nodes.x.less)
# No, that's the length of the _centers_, not the nodes.
sg.datavar.grid.nodes.x.len <-length(sg.datavar.grid.nodes.x.less) + 1
# # start debugging
# cat(sprintf('%s: sg.datavar.grid.nodes.x.less:\n', this.fn))
# cat(sprintf('\tlength==%i\n', sg.datavar.grid.nodes.x.len))
# cat(sprintf('\thead==\n'))
# print(head(sg.datavar.grid.nodes.x.less))
# cat(sprintf('\ttail==\n'))
# print(tail(sg.datavar.grid.nodes.x.less))
# #   end debugging

### Get top cell node coordinates.

sg.datavar.grid.nodes.x.more <-
  M3::get.coord.for.dimension(
    file=sg.netcdf.fp, dimension="col", position = "upper", units=sg.datavar.grid.units)$coords
# # start debugging
# cat(sprintf('%s: sg.datavar.grid.nodes.x.more:\n', this.fn))
# cat(sprintf('\tlength==%i\n', length(sg.datavar.grid.nodes.x.more)))
# cat(sprintf('\thead==\n'))
# print(head(sg.datavar.grid.nodes.x.more))
# cat(sprintf('\ttail==\n'))
# print(tail(sg.datavar.grid.nodes.x.more))
# #   end debugging

### Add the outermost node to the bottom vector.

sg.datavar.grid.nodes.x <-
  c(sg.datavar.grid.nodes.x.less,
    # R indices are 1-based
    sg.datavar.grid.nodes.x.more[sg.datavar.grid.nodes.x.len - 1])
# # start debugging
# cat(sprintf('%s: sg.datavar.grid.nodes.x:\n', this.fn))
# cat(sprintf('\tlength==%i\n', length(sg.datavar.grid.nodes.x)))
# cat(sprintf('\thead==\n'))
# print(head(sg.datavar.grid.nodes.x))
# cat(sprintf('\ttail==\n'))
# print(tail(sg.datavar.grid.nodes.x))
# summary(sg.datavar.grid.nodes.x)
# #   end debugging
# > cat(sprintf('\tlength==%i\n', length(sg.datavar.grid.nodes.x)))
#       length==460
# > print(head(sg.datavar.grid.nodes.x))
# [1] -2556000 -2544000 -2532000 -2520000 -2508000 -2496000
# > print(tail(sg.datavar.grid.nodes.x))
# [1] 2892000 2904000 2916000 2928000 2940000 2952000
# > summary(sg.datavar.grid.nodes.x)
#     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
# -2556000 -1179000   198000   198000  1575000  2952000 
# vs https://github.com/TomRoche/cornbeltN2O/wiki/AQMEII-North-American-domain#wiki-EPA
# > domain origin (from projection center, in km): -2556 (W), -1728 (S)
# > horizontal grid spacing: 12 km
# > horizontal grid count (x,y): 459, 299 

## ROW
## M3.pdf
## > If dimension is "row", return as element coords a vector containing the y-coordinates
## > of the centers ("ctr"), left ("lower"), or right ("upper") edges of each row

### Get left cell node coordinates.

sg.datavar.grid.nodes.y.less <-
  M3::get.coord.for.dimension(
    file=sg.netcdf.fp, dimension="row", position = "lower", units=sg.datavar.grid.units)$coords
# sg.datavar.grid.nodes.y.len <-length(sg.datavar.grid.nodes.y.less)
sg.datavar.grid.nodes.y.len <-length(sg.datavar.grid.nodes.y.less) + 1
# # start debugging
# cat(sprintf('%s: sg.datavar.grid.nodes.y.less:\n', this.fn))
# cat(sprintf('\tlength==%i\n', sg.datavar.grid.nodes.y.len))
# cat(sprintf('\thead==\n'))
# print(head(sg.datavar.grid.nodes.y.less))
# cat(sprintf('\ttail==\n'))
# print(tail(sg.datavar.grid.nodes.y.less))
# #   end debugging

### Get right cell node coordinates.

sg.datavar.grid.nodes.y.more <-
  M3::get.coord.for.dimension(
    file=sg.netcdf.fp, dimension="row", position = "upper", units=sg.datavar.grid.units)$coords
# # start debugging
# cat(sprintf('%s: sg.datavar.grid.nodes.y.more:\n', this.fn))
# cat(sprintf('\tlength==%i\n', length(sg.datavar.grid.nodes.y.more)))
# cat(sprintf('\thead==\n'))
# print(head(sg.datavar.grid.nodes.y.more))
# cat(sprintf('\ttail==\n'))
# print(tail(sg.datavar.grid.nodes.y.more))
# #   end debugging

### Add the outermost node to the left vector.

sg.datavar.grid.nodes.y <-
  c(sg.datavar.grid.nodes.y.less,
    # R indices are 1-based
    sg.datavar.grid.nodes.y.more[sg.datavar.grid.nodes.y.len - 1])
# # start debugging
# cat(sprintf('%s: sg.datavar.grid.nodes.y:\n', this.fn))
# cat(sprintf('\tlength==%i\n', length(sg.datavar.grid.nodes.y)))
# cat(sprintf('\thead==\n'))
# print(head(sg.datavar.grid.nodes.y))
# cat(sprintf('\ttail==\n'))
# print(tail(sg.datavar.grid.nodes.y))
# # check for monotonicity, which can be a problem, at least with M3::project.M3.to.lonlat
# summary(sg.datavar.grid.nodes.y)
# #   end debugging
# > cat(sprintf('\tlength==%i\n', length(sg.datavar.grid.nodes.y)))
#       length==300
# > print(head(sg.datavar.grid.nodes.y))
# [1] -1728000 -1716000 -1704000 -1692000 -1680000 -1668000
# > print(tail(sg.datavar.grid.nodes.y))
# [1] 1800000 1812000 1824000 1836000 1848000 1860000
# > summary(sg.datavar.grid.nodes.y)
#     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
# -1728000  -831000    66000    66000   963000  1860000 
# vs https://github.com/TomRoche/cornbeltN2O/wiki/AQMEII-North-American-domain#wiki-EPA
# > domain origin (from projection center, in km): -2556 (W), -1728 (S)
# > horizontal grid spacing: 12 km
# > horizontal grid count (x,y): 459, 299 

# 2. Project (ROW, COL) to (lon, lat).

lon.lat.list <- M3::project.M3.to.lonlat(
  x=sg.datavar.grid.nodes.x, y=sg.datavar.grid.nodes.y,
  file=sg.netcdf.fp, units=sg.datavar.grid.units)
## M3.pdf
## > [returns a] list containing the elements coords and units.
## > The element coords contains a matrix of coordinates
##                                 ^^^^^^
# Note: gets
# > Warning message:
# > In cbind(x, y) :
# >   number of rows of result is not a multiple of vector length
# why? because we're projecting grid nodes instead of grid centers?

datavar.grid.nodes.lon <- lon.lat.list$coords[,"longitude"]
# start debugging
cat(sprintf('%s: datavar.grid.nodes.lon:\n', this.fn))
cat(sprintf('\tlength==%i\n', length(datavar.grid.nodes.lon)))
cat(sprintf('\thead==\n'))
print(head(datavar.grid.nodes.lon))
cat(sprintf('\ttail==\n'))
print(tail(datavar.grid.nodes.lon))
summary(datavar.grid.nodes.lon)
#   end debugging
#       length==460
# > print(head(datavar.grid.nodes.lon))
# [1] -121.0633 -120.9847 -120.9058 -120.8266 -120.7472 -120.6676
# > print(tail(datavar.grid.nodes.lon))
# [1] -63.86990 -63.69620 -63.52222 -63.34793 -63.17335 -62.99848
# > summary(datavar.grid.nodes.lon)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# -121.10 -110.20  -94.30  -94.38  -81.07  -63.00 
# vs https://github.com/TomRoche/cornbeltN2O/wiki/AQMEII-North-American-domain#wiki-EPA
# > longitudes: -130° to -59.5° (W)

# datavar.grid.nodes.lat <- lon.lat.list$coords[,"latitude"]
## Note: datavar.grid.nodes.lat is not monotonic! @ index=300
# > datavar.grid.nodes.lat
#   [1] 21.55726 21.68468 21.81211 21.93955 22.06701 22.19449 22.32197 22.44947
# ...
# [289] 54.80393 54.89120 54.97815 55.06479 55.15112 55.23713 55.32282 55.40820
# [297] 55.49326 55.57799 55.66241 55.74650 24.01844 24.11121 24.20385 24.29637
# [305] 24.38875 24.48101 24.57314 24.66514 24.75700 24.84874 24.94033 25.03180
# ...
## This is apparently because M3::project.M3.to.lonlat returns a _matrix_,
## so it's padding out lat, which is the shorter vector.
## I.e., M3::project.M3.to.lonlat is using `cbind` or `rbind`, which 'recycle' values.
datavar.grid.nodes.lat <- lon.lat.list$coords[,"latitude"][1:sg.datavar.grid.nodes.y.len]
# start debugging
cat(sprintf('%s: datavar.grid.nodes.lat:\n', this.fn))
cat(sprintf('\tlength==%i\n', length(datavar.grid.nodes.lat)))
cat(sprintf('\thead==\n'))
print(head(datavar.grid.nodes.lat))
cat(sprintf('\ttail==\n'))
print(tail(datavar.grid.nodes.lat))
summary(datavar.grid.nodes.lat)
#   end debugging
#       length==300
# > print(head(datavar.grid.nodes.lat))
# [1] 21.55726 21.68468 21.81211 21.93955 22.06701 22.19449
# > print(tail(datavar.grid.nodes.lat))
# [1] 55.32282 55.40820 55.49326 55.57799 55.66241 55.74650
# > summary(datavar.grid.nodes.lat)
#    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#   21.56   31.06   40.25   39.72   48.65   55.75 
# vs https://github.com/TomRoche/cornbeltN2O/wiki/AQMEII-North-American-domain#wiki-EPA
# > latitudes: 23.5° to 58.5° (N)

# 3. Create 3D array of grid nodes=(lon, lat, hσp)
#    lon and lat values come from vectors above.
#    hσp values come from the global attribute="VGLVLS", per (but note misspelling)
# http://www.nco.ncep.noaa.gov/pmb/codes/nwprod/lib/sorc/aqm_ioapi/ioapi/binio.html
# > REAL VGLEVELS[0:NLAYS+1]: array of vertical coordinate level values;
# > level 1 of the grid goes from vertical coordinate VGLEVELS[0] to VGLEVELS[1], etc.
# http://ofmpub.epa.gov/rsig/rsigserver?regridding.html
# > CMAQ vertical grid description parameters NLAYS, VGTOP, VGLVLS (sigma pressures)

# sg.levels.hsp.MOZART == reversed (bottom layer is vector[-1]) and units==mb
sg.levels.hsp.MOZART <- rev(sg.levels.hsp.raw * 1000.0)

hsp.lat.lon <- as.matrix(
  expand.grid(sg.levels.hsp.MOZART, datavar.grid.nodes.lat, datavar.grid.nodes.lon),
  dimnames=list(NULL,      # for rows
    c("hsp", "lat", "lon") # for cols
  )
)
# start debugging
cat(sprintf('%s: hsp.lat.lon:\n', this.fn))
cat(sprintf('\tlength==%i\n', length(hsp.lat.lon)))
cat(sprintf('\thead==\n'))
print(head(hsp.lat.lon))
cat(sprintf('\ttail==\n'))
print(tail(hsp.lat.lon))
summary(hsp.lat.lon)
#   end debugging

# 4. Write (lon, lat, hσp) to CSV for consumption by NCL.
cat(sprintf('%s: write.table-ing hsp.lat.lon to %s\n', this.fn, tg.csv.fp))
# cat(sprintf('%s: start=%s\n', this.fn, system('date', intern=TRUE)))
write.table(x=hsp.lat.lon, file=tg.csv.fp,
  sep=',', append=FALSE, quote=FALSE, row.names=FALSE, col.names=FALSE)
# cat(sprintf('%s:   end=%s\n', this.fn, system('date', intern=TRUE)))
# # start debugging
# ls.cmd <- sprintf('ls -alh %s', tg.csv.fp)
# cat('\n') # separator
# cat(sprintf('%s: %s==\n', this.fn, ls.cmd))
# system(ls.cmd)
# cat('\n') # separator
# cat(sprintf('%s: `head %s`==\n', this.fn, tg.csv.fp))
# system(sprintf('head %s', tg.csv.fp))
# cat('\n') # separator
# cat(sprintf('%s: `tail %s`==\n', this.fn, tg.csv.fp))
# system(sprintf('tail %s', tg.csv.fp))
# #   end debugging
# > system(sprintf('head %s', tg.csv.fp))
# 500,21.5572634001491,-121.063324105307
# 550,21.5572634001491,-121.063324105307
# 600,21.5572634001491,-121.063324105307
# 650,21.5572634001491,-121.063324105307
# 700,21.5572634001491,-121.063324105307
# 740,21.5572634001491,-121.063324105307
# 770,21.5572634001491,-121.063324105307
# 800,21.5572634001491,-121.063324105307
# 820,21.5572634001491,-121.063324105307
# 840,21.5572634001491,-121.063324105307
# > system(sprintf('tail %s', tg.csv.fp))
# 930,55.7464959233389,-62.9984802608885
# 940,55.7464959233389,-62.9984802608885
# 950,55.7464959233389,-62.9984802608885
# 960,55.7464959233389,-62.9984802608885
# 970,55.7464959233389,-62.9984802608885
# 980,55.7464959233389,-62.9984802608885
# 985,55.7464959233389,-62.9984802608885
# 990,55.7464959233389,-62.9984802608885
# 995,55.7464959233389,-62.9984802608885
# 1000,55.7464959233389,-62.9984802608885