Source

cnda_scripts / archive_cleanup / uncompress_dicom.sh

Full commit
#!/bin/bash
# James Ransford - 12/29/2011
# Pipe in a list of projects and this script will
# uncompress all dicom files that are jpeglossless.

# Script requires dcmdump and dcmconv from
# DCMTK - http://dicom.offis.de/dcmtk 

# Path to CNDA archive.
ARCHIVE=/data/CNDA/archive

# dcm_uncompress/ directory is created here.
LOG_DIR=`pwd`

# Path the Grassroots dicom bin.
DCMTK_BIN=/nrgpackages/tools.release/dcmtk-3.6.1_20111208-install/bin

# Create directory for logs if it doesn't exist.
if [ ! -d $LOG_DIR/dcm_uncompress ]; then
   echo "Creating directory $LOG_DIR/dcm_uncompress"
   mkdir $LOG_DIR/dcm_uncompress
fi

# Create the sessions completed file. 
if [ ! -e $LOG_DIR/dcm_uncompress/sessions_completed ]; then
  touch $LOG_DIR/dcm_uncompress/sessions_completed
fi 

# Set the delimiter to \n so we can catch files with
# spaces in the name. 
IFS=$'\n'

# Don't look at files with the following in the name
# This is just to narrow down the number of files we run the file command on.  
# We know that files that have the following in their name will never be DICOM. 
FILES_WITHOUT="((\/PROCESSED\/)|(\/ASSESSORS\/)|(\.4dfp\.)|(\/SNAPSHOTS\/)|((\.xml|\.gif|\.log|\.err|\.gz|\.rec|\.dat|\.conc|\.lst|\.m3z|\.mgz)$))"

while read project; do 
   echo "" && echo "[INFO] >> Project: $project"
   # Look for the arc directories for this project (ex. arc001, arc002)
   for arc in `ls -1 $ARCHIVE/$project | egrep "^arc[0-9]{3}$"`; do
      if [ -d $ARCHIVE/$project/$arc ]; then      
         # Check for session directories inside this arc
         for session in $ARCHIVE/$project/$arc/*; do
            if [ -d $session ]; then
               if [ -z "$(cat $LOG_DIR/dcm_uncompress/sessions_completed | egrep "$session - Done.")" ]; then 
                  count=0
                  echo "" && echo "|> Checking Session:  $arc/`basename $session`"          
                  for file in `find $session -type f | egrep -v $FILES_WITHOUT`; do
                     if [ -n "$(file -b $file | grep 'DICOM medical imaging data')" ]; then 
                        if [ -n "$(dcmdump --search '0002,0010' $file | grep 'JPEG')" ]; then
                           # Uncompress the dicom file
                           echo "|DICOM|> $file "
                           $DCMTK_BIN/dcmdjpeg $file $file.out.dcm
                           if [ $? -eq 0 ]; then 
                              let count=count+1
                              mv $file.out.dcm $file
                              echo $file >> $LOG_DIR/dcm_uncompress/files_modified.log
                           else
                              echo $file >> $LOG_DIR/dcm_uncompress/failed.log
                           fi 
                        fi
                    fi  
                  done # for file in
                  echo "|SESSION|> $arc/`basename $session` - Done. $count files uncompressed."
                  echo "$session - Done. $count files uncompressed.  `date`" >> $LOG_DIR/dcm_uncompress/sessions_completed 
               else
                  echo "|SESSION|> $arc/`basename $session` has already been uncompressed.  Skipping..".
               fi
            fi
         done # for session in
      fi
   done # for arc in
   echo "[INFO] >> Project: $project --> DONE."
   echo "$project dicom files uncompressed on `date`" >> $LOG_DIR/dcm_uncompress/projects_completed
done # while

# Reset the delimiter
unset IFS