Commits

rsvp committed 166abb4 Draft

Add jason.sh to pretty print json for key-value lines.

Comments (0)

Files changed (2)

 README for gists
 ================
 
-Introduction :: last update : 2012-04-22
+Introduction :: last update : 2012-06-16
 ========================================
 
 "Gists" are useful code snippets or short scripts, where documentation
 Short descriptions below are in reverse chronological order:
 
 
+jason.sh
+========
+
+pretty print json for key-value processing line-by-line.
+A json file is NOT easily readable especially in 
+compact form without spaces and without newlines.
+The goal is a readable and grepable output, 
+which can be further refined by awk.
+
+
 sto.sh
 ======
 
+#!/usr/bin/env bash
+#              bash 4.1.5(1)     Linux Ubuntu 10.04           Date : 2012-06-15
+#
+# _______________|  jason : pretty print json for key-value processing by line.
+#
+#           Usage:  jason  [file]  [ident=5]  [delim='__:']
+#
+#        Examples:  % jason foo.json  0 ': '
+#                   #                    ^typical json key-value separator.
+#                   #                 ^ident of 0 for just newline.
+#                   % jason foo.json
+#                   #       ^try this first for readability and grep.
+#
+#    Dependencies:  Python json module
+
+
+#  CHANGE LOG  LATEST version available:   https://bitbucket.org/rsvp/gists/src
+#
+#  2012-06-15  Start using Python json module instead of awk.
+#                 Note that json module since 2.6 is simplejason.
+#
+#  2012-02-26  Handle errors and those daggling braces.
+#  2012-02-25  First experimental version to parse json file.
+#                 It will be IMPERFECT because the structure of a 
+#                 particular json file will generally vary.
+#
+#  A json file is NOT easily readable especially in 
+#  compact form without spaces and without newlines.
+#
+#  The goal is a readable and grepable output, 
+#  which can be further refined by awk.
+
+
+#           _____ PREAMBLE_v2: settings, variables, and error handling.
+#
+LC_ALL=POSIX
+#      locale means "ASCII, US English, no special rules, 
+#      output per ISO and RFC standards." 
+#      Esp. use ASCII encoding for glob and sorting characters. 
+shopt -s   extglob
+#     ^set extended glob for pattern matching.
+set -e
+#   ^errors checked: immediate exit if a command has non-zero status. 
+set -u
+#   ^unassigned variables shall be errors.
+#    Example of default VARIABLE ASSIGNMENT:  arg1=${1:-'foo'}
+
+jsonf="$1"
+indent=${2:-'5'}
+delim=${3:-'__:'}
+
+
+program=${0##*/}   #  similar to using basename
+memf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX )
+
+
+cleanup () {
+     #  Delete temporary files, then optionally exit given status.
+     local status=${1:-'0'}
+     rm -f $memf
+     [ $status = '-1' ] ||  exit $status      #  thus -1 prevents exit.
+} #--------------------------------------------------------------------
+warn () {
+     #  Message with basename to stderr.          Usage: warn "message"
+     echo -e "\n !!  ${program}: $1 "  >&2
+} #--------------------------------------------------------------------
+die () {
+     #  Exit with status of most recent command or custom status, after
+     #  cleanup and warn.      Usage: command || die "message" [status]
+     local status=${2:-"$?"}
+     cleanup -1  &&   warn "$1"  &&  exit $status
+} #--------------------------------------------------------------------
+trap "die 'SIG disruption, but cleanup finished.' 114" 1 2 3 15
+#    Cleanup after INTERRUPT: 1=SIGHUP, 2=SIGINT, 3=SIGQUIT, 15=SIGTERM
+#
+# _______________     ::  BEGIN  Script ::::::::::::::::::::::::::::::::::::::::
+
+
+[ -e "$jsonf" ] || die "non-existent file: $jsonf" 113
+
+
+{ python <<EOHereDoc
+import json
+
+#  >>> #  2012-04-30  PRETTY PRINT using Python's json module:
+#  >>> print json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=5)
+#       {
+#            "4": 5,
+#            "6": 7
+#       }
+
+with open( "$jsonf", 'rb' ) as f:
+     #            ^binary 
+     data = f.read()
+     data = json.JSONDecoder().decode( data )
+     #           ^convert json string into its Python representation.
+     #  print data
+     print json.dumps( data, sort_keys=True, indent = $indent )
+
+     #  If indent is a non-negative integer (it is None by default), 
+     #  then JSON array elements and object members will be 
+     #  pretty-printed with that indent level. An indent level of 0 
+     #  will only insert newlines. None is the most compact representation.
+
+EOHereDoc
+}         | sed -e 's/, *$//'  -e 's/{$//' > $memf
+#                                 ^delete leading brace, but not the line.
+#                  ^delete trailing commas
+#                   since newline will become field separator.
+
+#  #    sample output with indent set at 5 before sed:
+#  {
+#       "130.12.1.34": {
+#            "area_code": 0, 
+#            "country_code": "CA", 
+#            "country_code3": "CAN", 
+#            "country_name": "Canada", 
+#            "dma_code": 0, 
+#            "latitude": 44.233299255371101, 
+#            "locality": "Kingston", 
+#            "longitude": -76.483299255371094, 
+#            "postal_code": "", 
+#            "region": "ON"
+#       }, 
+#       "67.169.73.113": {
+#            "area_code": 415, 
+#            "country_code": "US", 
+#            "country_code3": "USA", 
+#            "country_name": "United States", 
+#            "dma_code": 807, 
+#            "latitude": 37.758701324462898, 
+#            "locality": "San Francisco", 
+#            "longitude": -122.438102722168, 
+#            "postal_code": "94114", 
+#            "region": "CA"
+#       }
+#  }
+
+
+#  Elaborate way to eliminate first open brace, and last closing brace:
+lines=$( cat $memf | wc -l )
+line1=$(( lines - 1 ))
+line2=$(( lines - 2 ))
+tail -n $line1 $memf | head -n $line2  \
+     |  sed -e 's/"//'  -e "s/\": /$delim/"
+     #  sed unquotes the key, and then substitutes delim.
+
+
+cleanup
+# _______________ EOS ::  END of Script ::::::::::::::::::::::::::::::::::::::::
+
+
+
+#  # _______________ 2012-06-15  DEPRECATE AWK CODE
+#  
+#  #   Two applications of sed are necessary to introduce a newline 
+#  #   as a record separator replacing } and then eliminating the prefix {" 
+#  #
+#  sed -e 's/}, *{/\n{/g' "$1" | sed -e 's/^{ *"//' -e 's/}[]} ]*$//' > $memf
+#  #
+#  #      The third directive serves to eliminate any daggling end braces 
+#  #      (esp. if the json file was not originally a long single line).
+#  #         Obscure: the close bracket ] is not escaped but rather must 
+#  #         be the first character of a bracketed set.
+#  
+#  
+#  #   Warn if there seems to be sub-records:
+#  nrecords=$( cat $memf | wc -l )
+#  nbraces=$( egrep '(\}|\{)' $memf | wc -l )
+#  #                ^check for presence of more braces.
+#  (( $nbraces > 0 )) && \
+#       warn "$nbraces records possibly have sub-records (out of $nrecords)."
+#  
+#  
+#  #   sed within the process substituion will NORMALIZE each record such that
+#  #            - every odd-numbered field is a key, for which
+#  #            - corresponding value is every adjacent even-numbered field.
+#  #
+#  { awk -f - <(sed -e 's/, *"/": /g' $memf) <<EOHereDoc
+#       BEGIN { FS = "\": *" }
+#       {    #==================================================MAIN======== 
+#  
+#  
+#            #  EXPLORE:  print out each KEY-VALUE COMBO on a single line
+#            #            making it easier to later grep a json file:
+#            for ( i = 1 ; i < NF ; i += 2 ) {
+#                 j = i + 1
+#                 key   = \$i
+#                 value = \$j
+#                 print key, "@@", value
+#                 #     ^key could contain spaces, so distinguish
+#                 #      key from value using @@.
+#            }
+#            print "===@==="
+#            #     ^ad hoc RECORD SEPARATOR.
+#            #
+#            #     ___ATTN___ Be sure to examine the very top of file
+#            #                and very bottom for daggling braces.
+#            #                Those could be edited out manually.
+#            #
+#            #     Tip: use awk's pattern matching feature on keys.
+#            #          Remember that keys are not necessarily positional; 
+#            #          they are not necessarily in some sorted order. 
+#  
+#  
+#        }   #==================================================END========
+#  EOHereDoc
+#  }         ||  die "bad awk within here_doc." 113
+
+
+
+#  vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=sh :