Source

MOZART-global to AQMEII-NA / prune_IOAPI.ncl

Full commit
;!/usr/bin/env ncl
;----------------------------------------------------------------------

; Driven by geographicalize_IOAPI.sh, merely to convert netCDF IOAPI file with numerous datavars into one with a single (non-IOAPI) datavar:
; it's just much easier to write netCDF with NCL than with R::ncdf4.

; TODO: pass parameters via commandline, not environment variables

;----------------------------------------------------------------------
; libraries
;----------------------------------------------------------------------

load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_code.ncl" ; all built-ins?
;load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_csm.ncl"
load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl" ; for *_VarAtts

;----------------------------------------------------------------------
; functions
;----------------------------------------------------------------------

; function add_att_string(
;   fh,          ; file handle
;   att_name,    ; (possible) name of the attribute
;   att_string,  ; value to assign
;   add_mode     ; "pre"=prepend
; )

function prepend_att_string(
  fh,          ; file handle                
  fh_atts,     ; attributes intended for fh
  att_name,    ; (possible) name of the attribute
  att_string   ; value to assign
)
  ; returns possibly-augmented fh_atts
  local att_name_lc, att_name_uc, att_name_pc, valid_att_names, valid_att_names_len, valid_att_names_log, van_i, valid_att_name, valid_att_val_old, valid_att_val_new
begin
  att_name_lc = str_lower(att_name)
  att_name_uc = str_upper(att_name)
  att_name_pc = str_capital(att_name) ; proper-case
  valid_att_names = (/ att_name_lc, att_name_uc, att_name_pc /)
  valid_att_names_len = dimsizes(valid_att_names) - 1 ; scalar int
  valid_att_names_log = isatt(fh, valid_att_names)    ; array of boolean

  if (any(valid_att_names_log)) then
    ; gotta figure out which one--take only the first
    do van_i = 0, valid_att_names_len
      if (valid_att_names_log(van_i)) then
        ; there is a previously existing global att name=valid_att_names(van_i)
        valid_att_name = valid_att_names(van_i)
        valid_att_val_old = fh@$valid_att_name$
        ; prepend to it
        valid_att_val_new = att_string + " ; " + valid_att_val_old
        fh_atts@$valid_att_name$ = valid_att_val_new
        ; delete the old (which we pledge to replace)
        delete_VarAtts(fh, valid_att_name)
        return(fh_atts)
      end if
    end do
  else
    ; just add att_name
    fh_atts@$att_name$ = att_string
    return(fh_atts)
  end if
  
end ; function prepend_att_string

;----------------------------------------------------------------------
; payload
;----------------------------------------------------------------------

begin ; skip if copy/paste-ing to console
  ; constants--------------------------------------------------------

  ;; all the following env vars must be set and exported in driver script (TODO: fail if not)
  this_fn = getenv("CALL_NCL_FN")
;  print("this_fn=="+this_fn) ; debugging

  ;; source grid
  sg_fp = getenv("SG_FP")
  sg_fh = addfile(sg_fp, "r") ; read-only

  ;;; source datavars names
  sg_datavar_grid_name = getenv("SG_DATAVAR_GRID_NAME")
  sg_datavar_ioapi_name = getenv("SG_DATAVAR_IOAPI_NAME")

  ;; target grids: intermediate grids from driver perspective

  ;;; target netCDF gridfile
  tg_netcdf_fp = getenv("IG_NETCDF_FP")

  ;;; target CSV gridfile
  tg_csv_fp = getenv("IG_CSV_FP")
  tg_csv_na = stringtofloat(getenv("IG_CSV_NA"))
  tg_csv_float_f = getenv("IG_CSV_FLOAT_F")
  tg_csv_integ_f = getenv("IG_CSV_INTEG_F")

  ; code--------------------------------------------------------------

  ;; open target netCDF file
  ; gotta delete it! (in driver)
  tg_netcdf_fh = addfile(tg_netcdf_fp, "c") ; create
  setfileoption(tg_netcdf_fh, "DefineMode", True) ; enter define mode

  ;; get source datavars: need these to set global attributes!
  sg_datavar_grid = sg_fh->$sg_datavar_grid_name$
  sg_datavar_ioapi = sg_fh->$sg_datavar_ioapi_name$
  sg_datavar_grid_longname = sg_datavar_grid@long_name

  ;; set global attributes
  ; copy global attributes sg -> tg per http://www.ncl.ucar.edu/Support/talk_archives/2009/2675.html
  fileattdef(tg_netcdf_fh, sg_fh)

  ;;; rewrite pruned global atts VAR-LIST, NVARS
  varlist = "VAR-LIST" 
  delete_VarAtts(tg_netcdf_fh, varlist)
  delete_VarAtts(tg_netcdf_fh, "NVARS")

  ;;; create additional global attributes
  ; TODO: take these from commandline or envvar
  tg_netcdf_fh_att = True ; assign file attributes
  tg_netcdf_fh_att@$varlist$ = sg_datavar_grid_longname ; a peculiarity of IOAPI. TODO: doc me!
  tg_netcdf_fh_att@NVARS = 1
  tg_netcdf_fh_att = prepend_att_string(tg_netcdf_fh, tg_netcdf_fh_att, "title", "initial conditions for 2008 PHASE run over AQMEII-NA")
  tg_netcdf_fh_att = prepend_att_string(tg_netcdf_fh, tg_netcdf_fh_att, "source_file", sg_fp)
  tg_netcdf_fh_att = prepend_att_string(tg_netcdf_fh, tg_netcdf_fh_att, "Conventions", "IOAPI-3.0")
  tg_netcdf_fh_att = prepend_att_string(tg_netcdf_fh, tg_netcdf_fh_att, "history", "see https://bitbucket.org/tlroche/mozart-global-to-aqmeii-na")
  tg_netcdf_fh_att = prepend_att_string(tg_netcdf_fh, tg_netcdf_fh_att, "creation_date", systemfunc("date"))

  fileattdef(tg_netcdf_fh, tg_netcdf_fh_att) ; copy file attributes   

  ;; coordinate vars: none

  ;; dimensions: how to iterate?

  ;;; grid dimensions
  sg_datavar_grid_dim_n = dimsizes(sg_datavar_grid)
  sg_datavar_grid_TSTEP_n = sg_datavar_grid_dim_n(0)
  sg_datavar_grid_LAY_n = sg_datavar_grid_dim_n(1)
  sg_datavar_grid_ROW_n = sg_datavar_grid_dim_n(2)
  sg_datavar_grid_COL_n = sg_datavar_grid_dim_n(3)

  ;;; IOAPI dimensions
  sg_datavar_ioapi_dim_n = dimsizes(sg_datavar_ioapi)
  sg_datavar_ioapi_TSTEP_n = sg_datavar_ioapi_dim_n(0)
  ;;;;; gotta prune VAR in target grid
  sg_datavar_ioapi_VAR_n = sg_datavar_ioapi_dim_n(1)
  sg_datavar_ioapi_DATETIME_n = sg_datavar_ioapi_dim_n(2)

;  copy_VarCoords(sg_fh, tg_netcdf_fh) ; fails
;  > fatal:Dimension (0) is out of range for file (ICON_CB05AE5_US12_2007356_pruned)
;  > fatal:Execute: Error occurred at or near line 193 in file $NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl

  ;;; set global dimensions
  tg_datavar_ioapi_VAR_n = 1 ; prune the output dimension
  ;             TSTEP  DATE-TIME      LAY      VAR      ROW      COL
  dimNames = (/ sg_fh!0, sg_fh!1, sg_fh!2, sg_fh!3, sg_fh!4, sg_fh!5 /) ;
  dimSizes = (/ sg_datavar_grid_TSTEP_n, \
                sg_datavar_ioapi_DATETIME_n, \
                sg_datavar_grid_LAY_n, \
;                sg_datavar_ioapi_VAR_n, \
                tg_datavar_ioapi_VAR_n, \
                sg_datavar_grid_ROW_n, \
                sg_datavar_grid_COL_n /)

;  ; start debugging (note final comment will fail in shell)
;  print(this_fn+": dimNames=="+dimNames) ; debugging
;  print(this_fn+": dimSizes=="+dimSizes) ; debugging
;  ;   end debugging

  ; no unlimited dims in tg
  dimUnlim = (/ False, False, False, False, False, False /)
  filedimdef(tg_netcdf_fh, dimNames, dimSizes, dimUnlim)

;  ; start debugging
;  print("tg_netcdf_fh with attributes only==")
;  printVarSummary(tg_netcdf_fh)
;  print(tg_netcdf_fh)
;  ;   end debugging

  ;; define variables
  filevardef(tg_netcdf_fh, sg_datavar_grid_name, typeof(sg_datavar_grid), getvardims(sg_datavar_grid))
  filevardef(tg_netcdf_fh, sg_datavar_ioapi_name, typeof(sg_datavar_ioapi), getvardims(sg_datavar_ioapi))

  ;;; define variables' attributes
  ; copy all var atts
  filevarattdef(tg_netcdf_fh, sg_datavar_grid_name, sg_datavar_grid)
  filevarattdef(tg_netcdf_fh, sg_datavar_ioapi_name, sg_datavar_ioapi)

  ;; write data to target netCDF grid

  setfileoption(tg_netcdf_fh, "DefineMode", False) ; exit define mode
  ; write data to defined file
  tg_netcdf_fh->$sg_datavar_grid_name$ = (/ sg_datavar_grid /)
;    tg_netcdf_fh->$sg_datavar_ioapi_name$ = (/ sg_datavar_ioapi /)
  ; gotta only write 1 VAR: fortunately, TFLAG is homogenous:
  ; $ ncdump -v TFLAG ICON_CB05AE5_US12_2007356.nc | tail -n 91
  ; TFLAG =
  ;   2007356, 0,
  ;   2007356, 0,
  ;   2007356, 0,
  ; ...
  ;   2007356, 0,
  ;   2007356, 0,
  ;   2007356, 0 ;
  ; }
  ; can't make named subscripting work
  ; sg_datavar_ioapi_pruned = sg_datavar_ioapi(TSTEP | :, VAR | 0, DATE\-TIME | :)
  ; with spaces, NCL still thinks it's named subscripting
  ; sg_datavar_ioapi_pruned = sg_datavar_ioapi(:, 0, :)
  sg_datavar_ioapi_pruned = sg_datavar_ioapi(:,0,:)
  tg_netcdf_fh->$sg_datavar_ioapi_name$ = (/ sg_datavar_ioapi_pruned /)

;  ; start debugging
;  print("tg_netcdf_fh with variables and attributes only==")
;  printVarSummary(tg_netcdf_fh)
;  print(tg_netcdf_fh)
;  ;   end debugging

;   ;; write sg_datavar_grid for ease of reading from R-----------------
;   ;;; same code as hsp_to_gh_amsl.ncl::"; write z_int_geo_var to ASCII table"
;   ;;; TODO: refactor!

;   ; wish I could use
;   ; write_table(tg_csv_fp, "w",  ; overwrite tg_csv_fp
;   ;   ; list of values to write
;   ;   ; format for writing values
;   ; )
;   ; but don't see how to go from NO2(TSTEP, LAY, ROW, COL) to NCL-style list.

;   ; vector=lines holds strings of form [TSTEP, LAY, ROW, COL, dataval]
;   lines_i = 0 ; index into vector=lines

;   ; coordinate dimensions and indices (used later for iteration)
; ;  TSTEP_var = sg_fh->TSTEP
; ; error: "variable (TSTEP) is not in file (sg_fh)" ... because TSTEP==1?
; ;  TSTEP_var = (/ 1 /)
; ;  TSTEP_var = sg_fh!0
; ; Since there are no coordinate variables, there is no real variable: TSTEP_var == "TSTEP" (the string)
;   TSTEP_min = 0                 ; C-style, min array index
;   TSTEP_max = sg_datavar_grid_TSTEP_n - 1       ; max array index

;   LAY_min = 0
;   LAY_max = sg_datavar_grid_LAY_n - 1

;   ROW_min = 0
;   ROW_max = sg_datavar_grid_ROW_n - 1

;   COL_min = 0
;   COL_max = sg_datavar_grid_COL_n - 1

;   ; lines_n == |records| in vector=lines == length(sg_datavar_grid)
;   lines_n = sg_datavar_grid_TSTEP_n * sg_datavar_grid_LAY_n * sg_datavar_grid_COL_n * sg_datavar_grid_ROW_n
;   lines_max = lines_n - 1
;   lines = new( (/ lines_n /), string)
;   lines@_FillValue = tg_csv_na

;   ; Loop order (hopefully) facilitates reading vector @ each horizontal.
;   do r=ROW_min, ROW_max

;     ; poor man's progress
;     print(this_fn+": processing ROW=="+sprinti("%3i",r)+"/"+ROW_max+" @ time=="+systemfunc("date +%H:%M:%S"))

;     do c=COL_min, COL_max
;       do l=LAY_min, LAY_max
;         do t=TSTEP_min, TSTEP_max
; ;          lines(lines_i) = sprintf(tg_csv_line_format, COL_var(c), row_var(r), sg_datavar_grid(i,r,c))
; ;          lines(lines_i) = sprintf(tg_csv_line_format, (/ COL_var(c), row_var(r), sg_datavar_grid(i,r,c) /) )
;           ; that would be too easy !-(
; ;          line = sprinti(tg_csv_integ_f, ROW_var(r)) + "," + \
;           ; Since there are no coordinate variables,
;           ; * there is no real variable: ROW_var == "ROW" (the string)
;           ; * we can assume dimension values are indices (no?)

; ;          ; start debugging-------------------------------------------
; ;          print("  ROW=="+r)
; ;          print("  COL=="+c)
; ;          print("  LAY=="+l)
; ;          print("TSTEP=="+t)
; ;          ;   end debugging-------------------------------------------

;           line = sprinti(tg_csv_integ_f, r) + "," + \
;                  sprinti(tg_csv_integ_f, c) + "," + \
;                  sprinti(tg_csv_integ_f, l) + "," + \
;                  sprinti(tg_csv_integ_f, t) + "," + \
;                  sprintf(tg_csv_float_f, sg_datavar_grid(t,l,r,c))
;                  ; sg_datavar_grid dims=(TSTEP, LAY, ROW, COL)
;           lines(lines_i) = line
;           lines_i = lines_i + 1
;         end do ; t=TSTEP_min, TSTEP_max
;       end do ; l=LAY_min, LAY_max
;     end do ; c=COL_min, COL_max
;   end do ; r=row_min, row_max

; ; start debugging---------------------------------------
;   printVarSummary(lines)
;   print("head(lines)==")
;   print(lines(0:10))
;   print("tail(lines)==")
;   print(lines((lines_max - 10):))
; ;   end debugging---------------------------------------

;   asciiwrite (tg_csv_fp, lines)

; ;; start debugging---------------------------------------
; ;  print("head("+tg_csv_fp+")==")
; ;  system("head "+tg_csv_fp) 
; ;  print("tail("+tg_csv_fp+")==")
; ;  system("tail "+tg_csv_fp) 
; ;;   end debugging---------------------------------------

end ; prune_IOAPI.ncl