Source

CLM_CN_global_to_AQMEII-NA / check_conservation.ncl

Full commit
;!/usr/bin/env ncl ; requires version >= ???
;----------------------------------------------------------------------
; Check conservation of mass in output of regrid/retemp/reunit-ed CLM-CN data relative to input, using input units.

;----------------------------------------------------------------------
; libraries
;----------------------------------------------------------------------

load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_code.ncl"    ; all built-ins?
;load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_csm.ncl"
load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl" ; for int2flt

;----------------------------------------------------------------------
; functions
;----------------------------------------------------------------------

; can't call `getenv` before `begin`?

; Calculate area bounded by lons and lats on sphere, as defined by input
; * variable with coord vars=(lat, lon) (i.e., with those names and that order)
;   defining the _centers_ of a grid
; * radius of sphere
; Returns area as float(lat,lon), but assumes input lat,lon==double.
undef("lats_lons_sphere_area")  ; allow redefinition in console
function lats_lons_sphere_area(
  lat_lon_var [*][*] : numeric, ; 2D
  radius      [1]    : numeric
)
  local Pi, radius_2, sphere_area,\
    lat_var, lat_size, lat_lines_size, lat_lines, lat_span, i_lat,\
    lon_var, lon_size, lon_lines_size, lon_lines, lon_span, i_lon    
begin

  Pi = 3.14159265 ; why do I hafta define this?
  radius_2 = radius ^ 2 ; will also use later
  sphere_area = 4 * Pi * radius_2 ; for debugging only

  ;;; get dimensions/coords of passed grid centers var
  lat_var = lat_lon_var&lat
  lat_size = dimsizes(lat_var)
  lat_lines_size = lat_size + 1
  lon_var = lat_lon_var&lon
  lon_size = dimsizes(lon_var)
  lon_lines_size = lon_size + 1

  ;;; create dimensions/coords for grid _lines_ var
  lat_lines=new( (/ lat_lines_size /), double)
  lon_lines=new( (/ lon_lines_size /), double)
  ; assume grid fully spans globe, and grid lines are evenly separated
  lat_span = 180.0/lat_size ; latitudinal span of each gridcell
  lon_span = 360.0/lon_size ; longitudinal span of each gridcell

  ; assume `lat` points to grid centers, with "innermost" line a half span away from first center.
  lat_lines(0) = lat_var(0) - (lat_span/2.0)
  do i_lat = 1 , lat_size
    lat_lines(i_lat) = lat_lines(i_lat - 1) + lat_span 
;    print("lat_lines("+i_lat+")="+lat_lines(i_lat))
  end do

  ; assume `lon` points to grid centers, with "leftmost" line a half span away from first center.
  lon_lines(0) = lon_var(0) - (lon_span/2.0)
  do i_lon = 1 , lon_size
    lon_lines(i_lon) = lon_lines(i_lon - 1) + lon_span 
;    print("lon_lines("+i_lon+")="+lon_lines(i_lon))
  end do

;  print("lats_lons_sphere_area: calculating area array")
  ; keep dimension order
  area_arr = new( (/ lat_size, lon_size /), float)
  do i_lat = 0 , lat_size - 1
    do i_lon = 0 , lon_size - 1
      ; `gc_qarea` is built-in!
      area_arr(i_lat, i_lon) = tofloat(gc_qarea(\
        (/ lat_lines(i_lat), lat_lines(i_lat+1), lat_lines(i_lat+1), lat_lines(i_lat)   /),\
        (/ lon_lines(i_lon), lon_lines(i_lon),   lon_lines(i_lon+1), lon_lines(i_lon+1) /)\
      ) * radius_2)
    end do ; iterate lon
  end do ; iterate lat

;  ; start debugging
;  print("sum(area_arr)="+sum(area_arr)+", sphere_area="+sphere_area)
;  print("sum(area_arr)/sphere_area="+sum(area_arr)/sphere_area)
;  ; (0)   sum(area_arr)=5.09889e+14, sphere_area=5.09904e+14
;  ; (0)   sum(area_arr)/sphere_area=0.99997
;  ;   end debugging

  return(area_arr) ; as float
end ; lats_lons_sphere_area

; If len < 0, blanks pad before `str`.
; If len > 0, blanks pad after `str`.
undef("pad_blanks_to_length") ; allow redefinition in console
; TODO: prototype arg dimensionality
function pad_blanks_to_length(
  str : string, ; can be array
  len : numeric ; must be scalar
)
begin
  ; relies on odd behavior of built-in==`str_insert`
  ; TODO: test length(str) <= len
  return(str_insert(str, "", len))
end ; pad_blanks_to_length
  
;----------------------------------------------------------------------
; code
;----------------------------------------------------------------------

begin ; skip if copy/paste-ing to console

;----------------------------------------------------------------------
; constants
;----------------------------------------------------------------------

  this_fn = getenv("CALL_CONSERV_FN") ; for debugging

  model_year = stringtoint(getenv("MODEL_YEAR"))
  nominal_gridcell_area = stringtofloat(getenv("NOMINAL_GRIDCELL_AREA"))
  molar_mass_N2O = stringtofloat(getenv("MOLAR_MASS_N2O"))
  nitrogen_mass_N2O = stringtofloat(getenv("NITROGEN_MASS_N2O"))
  seconds_per_hour = stringtofloat(getenv("SECONDS_PER_HOUR"))
  ; output gridcell/timestep N2O (moles/s/mo/gridcell)
  ;   * hours_per_month (to be computed) * seconds_per_hour
  ;   * nitrogen_mass_N2O / molar_mass_N2O
  ; = input gridcell/timestep flux (mgN/m^2/mo/gridcell)
  ;   * gridcell area (m^2, to be computed)
  ; Note:
  ; input timestep==monthly
  ; output timestep==hourly
  output_conv_coeff = seconds_per_hour * nitrogen_mass_N2O / molar_mass_N2O

  raw_fp = getenv("CLM_CN_RAW_FP")
  raw_datavar_name = getenv("CLM_CN_RAW_DATAVAR_NAME")
  CMAQ_radius = stringtofloat(getenv("CMAQ_RADIUS"))

  ;; load matrix of gridcell map scale factors (MSFs) from GRIDCRO2D_<date/>
  MSF_fp = getenv("MSF_FP") ; path to container netCDF file
  MSF_var_name = getenv("MSF_VAR_NAME")

  output_fp_template_var_name = getenv("CLM_CN_TARGET_VAR_NAME")
  ;;; workaround fact that NCL does not support *.ncf :-(
  output_fp_template_CMAQ = getenv("CLM_CN_TARGET_FP_TEMPLATE_CMAQ")
  output_fp_template_NCL = getenv("CLM_CN_TARGET_FP_TEMPLATE_NCL")
  ; replace output_fp_template_str with <year/><month/> to create "real" filepaths
  output_fp_template_str = getenv("CLM_CN_TARGET_FN_TEMPLATE_STR")

  ; conservation report constants
  sigdigs = stringtoint(getenv("OUTPUT_SIGNIFICANT_DIGITS"))
  report_field_width = stringtoint(getenv("CONSERV_REPORT_FIELD_WIDTH"))
  report_col_sep = getenv("CONSERV_REPORT_COLUMN_SEPARATOR")
  report_title = getenv("CONSERV_REPORT_TITLE")
  report_subtitle = getenv("CONSERV_REPORT_SUBTITLE")
  report_int_format = getenv("CONSERV_REPORT_INT_FORMAT")
  report_flt_format = getenv("CONSERV_REPORT_FLOAT_FORMAT")

;----------------------------------------------------------------------
; payload
;----------------------------------------------------------------------

  ;;; get (global) raw input data
  raw_fh = addfile(raw_fp, "r")  ; file handle
  raw_datavar = raw_fh->$raw_datavar_name$
  ;; ignore bogus "months" 1,14
  ; remember: NCL indexing is {C-style,0-based}
  raw_datavar_arr = raw_datavar(1:12,:,:)
  lat_lon_arr = raw_datavar(0,:,:) ; since only a template, can use any month
  raw_areas = lats_lons_sphere_area(lat_lon_arr, CMAQ_radius)
;  printVarSummary(raw_areas) ; debugging

  ;;; get output map scale factors (MSFs)
  MSF_fh = addfile(MSF_fp,"r") ; handle
  MSF_var = MSF_fh->$MSF_var_name$
  ; ignore the unitary layer and timestep
  MSF_arr = MSF_var(0,0,:,:)
;  out_sizes = dimsizes(MSF_arr); output "dimension sizes" (row x col)

  ;;; create containers for monthly summaries
  ;; use type=float like n2oemissions(...)
  in_sum = new( (/ 12 /), float)  ; emission sums across domain=global
  in_nNA =  new( (/ 12 /), float) ; count NAs
  out_sum = new( (/ 12 /), float) ; emission sums across domain=AQMEII-NA
  out_nNA =  new( (/ 12 /), float)
  ;; initialize them
  do i_month=0 , 11
    in_sum(i_month) = 0
    in_nNA(i_month) = 0
    out_sum(i_month) = 0
    out_nNA(i_month) = 0
  end do

  print("") ; newline
  print(this_fn+": about to iterate over monthly data") ; TODO: progress

  ;;; fill the data containers
  do i_month=0 , 11 ; iterate months
    month = i_month + 1
    hours_per_month = int2flt(days_in_month(model_year, month) * 24)

    ;;; process raw input: relatively straightforward, since no unit conversion
    ; units for in_data, in_sum: (mgN/m^2) * m^2
    ; Note multiplication with operator=* is *not* linear-algebra-style!
    ; To do linear-algebra-style matrix multiplication, use operator=#
    in_data = raw_datavar_arr(i_month,:,:) * raw_areas
    in_sum(i_month) = sum(in_data)
    ; count NAs in raw-input month
    in_nNA(i_month) = num(ismissing(in_data))

    ;;; process final outputs
    ;; Gotta convert output units (moles/s/gridcell) to input mass (mgN)
    ;; with different gridcells.

    ;; open monthly output file, noting NCL does not support *.ncf :-(
    time_str = sprinti("%4i", model_year) + sprinti("%0.2i", month)
    output_fp_monthly_NCL = \
      str_sub_str(output_fp_template_NCL, output_fp_template_str, time_str)
    output_fp_monthly_CMAQ = \
      str_sub_str(output_fp_template_CMAQ, output_fp_template_str, time_str)
    ; 
    system("cp "+output_fp_monthly_CMAQ+" "+output_fp_monthly_NCL)
    output_fp_monthly_NCL_fh = addfile(output_fp_monthly_NCL, "r")

    ;; accumulate stats for output
    out_hourly = output_fp_monthly_NCL_fh->$output_fp_template_var_name$
    ;; ASSERT: all output hourly emissions in a month are equal
;    for i_hour = 0 , 23 ; ignore CMAQ 25th hour
      ; TODO: check that dimensions match, MSF vs datavar
      out_hourly_raw = out_hourly(0,0,:,:)
      out_hourly_converted = \
        out_hourly_raw * hours_per_month * output_conv_coeff
      out_sum(i_month) = sum(out_hourly_converted)
      out_nNA(i_month) = num(ismissing(out_hourly_raw))
;    end do ; iterate hours
      
    ;;; cleanup
    ;; "close" monthly file
    system("rm "+output_fp_monthly_NCL)
    delete(month)
    delete(time_str)
    delete(output_fp_monthly_NCL)
    delete(output_fp_monthly_CMAQ)
    delete(output_fp_monthly_NCL_fh)
    delete(out_hourly)
    delete(out_hourly_converted)

  end do ; iterate months

  ;;; conservation report, with unit conversion
  print("") ; newline
;  print(report_title) ; prints variable information!
  print(report_title+"")
;  print(report_subtitle)
  print(report_subtitle+"")
  print(\  ; header1
    pad_blanks_to_length("", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("input", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("input", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("output", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("output", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("", -report_field_width) )
  print(\  ; header2
    pad_blanks_to_length("month", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("global", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("NAs", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("AQMEII-NA", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("NAs", -report_field_width) + report_col_sep +\
    pad_blanks_to_length("out/in", -report_field_width) )
  
;  ; start debugging
;  print("report_int_format="+report_int_format)
;  print("report_flt_format="+report_flt_format)
;  ;   end debugging
  do i_month=0 , 11 ; iterate months
    month = i_month + 1
    input_emi = in_sum(i_month)
    output_emi = out_sum(i_month)
    input_NAs = in_nNA(i_month)
    output_NAs = out_nNA(i_month)
    print(\
      sprinti(report_int_format, month)      + report_col_sep +\
      sprintf(report_flt_format, input_emi)  + report_col_sep +\
      sprintf(report_int_format, input_NAs)  + report_col_sep +\
      sprintf(report_flt_format, output_emi) + report_col_sep +\
      sprintf(report_int_format, output_NAs) + report_col_sep +\
      sprintf(report_flt_format, output_emi/input_emi) )
  end do ; iterate months

; ; start debug---------------------------------------------------------

; ;   end debug---------------------------------------------------------

end ; check_conservation.ncl